diff -Nur 2.4.18/include/linux/sockios.h 2.4.18/include/linux/sockios.h
--- 2.4.18/include/linux/sockios.h Wed Nov 7 17:39:36 2001
+++ 2.4.18/include/linux/sockios.h Wed Jun 5 15:55:54 2002
@@ -113,6 +113,10 @@
#define SIOCBONDSLAVEINFOQUERY 0x8993 /* rtn info about slave state */
#define SIOCBONDINFOQUERY 0x8994 /* rtn info about bond state */
#define SIOCBONDCHANGEACTIVE 0x8995 /* update to a new active slave */
+
+/* per-socket statistics manipulation */
+#define GIOCSOCKSTATS 0x8996 /* get the per-socket statistics */
+#define SIOCZEROSOCKSTATS 0x8997 /* zero out the per-socket statistics */
/* Device private ioctl calls */
diff -Nur 2.4.18/include/net/sock.h 2.4.18/include/net/sock.h
--- 2.4.18/include/net/sock.h Thu May 2 15:32:20 2002
+++ 2.4.18/include/net/sock.h Wed Jun 5 15:58:24 2002
@@ -480,6 +480,16 @@
wait_queue_head_t wq;
} socket_lock_t;
+/* per-socket statistics. received is the total number of skbuffs received
+ * on that socket. dropped_no_mem is the number of packets dropped due
+ * to a lack of space on the socket receive buffer
+ */
+typedef struct {
+ __u64 received;
+ __u32 dropped_no_mem;
+} socket_stats;
+
+
#define sock_lock_init(__sk) \
do { spin_lock_init(&((__sk)->lock.slock)); \
(__sk)->lock.users = 0; \
@@ -678,6 +688,10 @@
int (*backlog_rcv) (struct sock *sk,
struct sk_buff *skb);
void (*destruct)(struct sock *sk);
+
+
+ /* per-socket statistics */
+ socket_stats stats;
};
/* The per-socket spinlock must be held here. */
@@ -1145,11 +1159,15 @@
static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ sk->stats.received++;
+
/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
number of warnings when compiling with -W --ANK
*/
- if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
+ if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf) {
+ sk->stats.dropped_no_mem++;
return -ENOMEM;
+ }
#ifdef CONFIG_FILTER
if (sk->filter) {
@@ -1179,11 +1197,16 @@
static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
{
+ sk->stats.received++;
+
/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
number of warnings when compiling with -W --ANK
*/
- if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
+ if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf) {
+ sk->stats.dropped_no_mem++;
return -ENOMEM;
+ }
+
skb_set_owner_r(skb, sk);
skb_queue_tail(&sk->error_queue,skb);
if (!sk->dead)
diff -Nur 2.4.18/net/core/sock.c 2.4.18/net/core/sock.c
--- 2.4.18/net/core/sock.c Fri Dec 21 12:42:05 2001
+++ 2.4.18/net/core/sock.c Wed Jun 5 13:59:37 2002
@@ -1202,6 +1202,9 @@
sk->rcvlowat = 1;
sk->rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sndtimeo = MAX_SCHEDULE_TIMEOUT;
+
+ sk->stats.received = 0;
+ sk->stats.dropped_no_mem = 0;
atomic_set(&sk->refcnt, 1);
}
diff -Nur 2.4.18/net/ipv4/af_inet.c 2.4.18/net/ipv4/af_inet.c
--- 2.4.18/net/ipv4/af_inet.c Fri Dec 21 12:42:05 2001
+++ 2.4.18/net/ipv4/af_inet.c Wed Jun 5 15:56:20 2002
@@ -834,6 +834,16 @@
int pid;
switch(cmd) {
+ case GIOCSOCKSTATS:
+ return copy_to_user((void *)arg, &sk->stats, sizeof(sk->stats));
+ case SIOCZEROSOCKSTATS:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ else {
+ sk->stats.dropped_no_mem = 0;
+ sk->stats.received = 0;
+ return (0);
+ }
case FIOSETOWN:
case SIOCSPGRP:
err = get_user(pid, (int *) arg);
> i know of many many folk who use transaction logs from HTTP caches for
> volume-based billing.
> right now, those bills are anywhere between 10% to 25% incorrect.
>
> you call that "extremely limited"?
It wouldnt help you anyway. Prove which frames were not due to the
overloading and congestion/errors on your network which therefore the customer should
not have a duty to pay. Account for bitstuffing on HDLC links...
g'day Alan,
At 03:03 AM 23/06/2002 +0100, you wrote:
> > i know of many many folk who use transaction logs from HTTP caches for
> > volume-based billing.
> > right now, those bills are anywhere between 10% to 25% incorrect.
> >
> > you call that "extremely limited"?
>
>It wouldnt help you anyway. Prove which frames were not due to the
>overloading and congestion/errors on your network which therefore the
>customer should
>not have a duty to pay. Account for bitstuffing on HDLC links...
sure - but these are all Layer-8 (politics) and layer-9 (religion) issues.
typically Service Providers on this side of the planet handle that side of
things via SLAs internal to their own network. i.e. "we guarantee X%
uptime, less than Y% packet-loss across our own core network as measured
using XXYYZZ method".
the fact that an IP packet may have a PPP header on it across one hop, a
HDLC header across another, perhaps some MPLS labels across another,
802.1q-in-802.1q across another is generally immaterial.
if you did want to get fancy and account for it, at least you have
packet-counters on a per-socket basis from which to do that with.
without per-socket accounting, you just don't have that anyway.
cheers,
lincoln.
Your idea is totally useless for non-datagram sockets.
Only datagram sockets use the interfaces where you bump
the counters.
I don't like the patch, nor the idea behind it, at all.
"David S. Miller" wrote:
>
> Your idea is totally useless for non-datagram sockets.
> Only datagram sockets use the interfaces where you bump
> the counters.
>
> I don't like the patch, nor the idea behind it, at all.
Thanks for the feedback.
I buy the point about it only making sense for datagram sockets in its current
form. Thus it would maybe make more sense to use udp_ioctl() rather than in the
generic socket ioctl.
However, what do you have against the basic idea of a program knowing how many
packets have
been dropped on its sockets? I added the feature to try and figure out where
packets were being dropped in an app I am developing, and so far its been very
useful.
More generally, is there a generic place that I could tie into for the counter
increment that would work for all sockets? While tcp would automatically handle
the dropped packets, it might be useful to know how many there were.
Thanks,
Chris
--
Chris Friesen | MailStop: 043/33/F10
Nortel Networks | work: (613) 765-0557
3500 Carling Avenue | fax: (613) 765-2986
Nepean, ON K2H 8E9 Canada | email: [email protected]
Datagram sockets are the ones that drop data though (tcp will
deal with it via re-transmits).
I have not looked at his patch in detail, but I would welcome anything
that gets us closer to being able to account for every packet that enters
the NIC, or enters the kernel from user-space via send(to), etc...
David S. Miller wrote:
> Your idea is totally useless for non-datagram sockets.
> Only datagram sockets use the interfaces where you bump
> the counters.
>
> I don't like the patch, nor the idea behind it, at all.
>
>
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
On Fri, Jun 07, 2002 at 03:15:24PM -0700, Ben Greear wrote:
> David S. Miller wrote:
> > Your idea is totally useless for non-datagram sockets.
> > Only datagram sockets use the interfaces where you bump
> > the counters.
> > I don't like the patch, nor the idea behind it, at all.
> Datagram sockets are the ones that drop data though (tcp will
> deal with it via re-transmits).
Outside of the specific changes suggested by Chris, I can see a
requirement to be able to detect poor connections. While TCP/IP may
not drop packets from the perspective of user space applications,
TCP/IP packets do get lost. For certain applications that require high
bandwidth, or low latency, applications may be able to optimize code
paths by analyzing statistics related to the socket.
Datagram sockets are more straight forward to implement this for, but
that does not mean that TCP/IP does not have similar potential.
I am not certain what the exact requirement is for in Chris' cases,
but I do know that in his field, he is writing something far more
complicated and resource intensive than a telnet server.
mark
--
[email protected]/[email protected]/[email protected] __________________________
. . _ ._ . . .__ . . ._. .__ . . . .__ | Neighbourhood Coder
|\/| |_| |_| |/ |_ |\/| | |_ | |/ |_ |
| | | | | \ | \ |__ . | | .|. |__ |__ | \ |__ | Ottawa, Ontario, Canada
One ring to rule them all, one ring to find them, one ring to bring them all
and in the darkness bind them...
http://mark.mielke.cc/
You guys we have SNMP statistics for these events, there
is no reason to have them per-socket. You cannot convince
me that when you are diagnosing a problem the SNMP stats
are not enough to show you if the packets are being dropped.
If not, this means we need to add more SNMP events, that is
all it means.
David S. Miller wrote:
> You guys we have SNMP statistics for these events, there
> is no reason to have them per-socket. You cannot convince
> me that when you are diagnosing a problem the SNMP stats
> are not enough to show you if the packets are being dropped.
So, I will not attempt to convince you that you need per-socket
counters. I do know for absolute certain that I would like to
have them (I write a traffic-generation & testing program).
For instance, when I run 50Mbps bi-directional on a P-4 1.6Ghz machine,
using a single port of a DFE-570tx NIC, then I drop around .2% of
the packets, in bursts. I have kernel buffers very large (2MB),
and the CPU is not maxed out.
With the current system, it is difficult for me to know exactly what
I need to change to get better performance and/or if better performance
is even possible.
> If not, this means we need to add more SNMP events, that is
> all it means.
If you're talking per-socket SNMP counters, then that could work.
General protocol-wide counters would not help much, at least
in my case.
Thanks,
Ben
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
From: Ben Greear <[email protected]>
Date: Sat, 08 Jun 2002 17:13:35 -0700
If you're talking per-socket SNMP counters, then that could work.
General protocol-wide counters would not help much, at least
in my case.
Why not? If you know where the drops are occurring, what else
do you need to know?
I'm not talking about per-socket SNMP counters, that would be
rediclious.
On Sat, Jun 08, 2002 at 05:05:11PM -0400, Mark Mielke wrote:
> Datagram sockets are more straight forward to implement this for, but
> that does not mean that TCP/IP does not have similar potential.
>
> I am not certain what the exact requirement is for in Chris' cases,
> but I do know that in his field, he is writing something far more
> complicated and resource intensive than a telnet server.
Have you guys checked out if the TCP_INFO getsockopt() would
work for your needs? (obviously, it'll only work for TCP connections
). It gives you quite a bit of detail about what's happening
in your TCP connection (retransmissions, window sizes etc.).
printf("unacked: %d sacked: %d lost: %d retrans: %d fackets: %d\n",
info.tcpi_unacked,info.tcpi_sacked,info.tcpi_lost,
info.tcpi_retrans,info.tcpi_fackets);
printf("pmtu: %d rcv_ssthresh: %d rtt: %d rttvar: %d snd_ssthresh:
%d\nsnd_cwnd: %d advmss:
%d reordering: %d\n",info.tcpi_pmtu,info.tcpi_rcv_ssthresh,
info.tcpi_rtt,info.tcpi_rttvar,info.tcpi_snd_ssthresh,info.tcpi_snd_cwnd,info.tcpi_advmss,
info.tcpi_reordering);
--
M.Sc. (Eng.) Pekka Pietikainen, Nixu Oy
David S. Miller wrote:
> From: Ben Greear <[email protected]>
> Date: Sat, 08 Jun 2002 17:13:35 -0700
>
> If you're talking per-socket SNMP counters, then that could work.
> General protocol-wide counters would not help much, at least
> in my case.
>
> Why not? If you know where the drops are occurring, what else
> do you need to know?
I need to account for packets on a per-session basis, where a
session endpoint is a UDP port. So, knowing global protocol numbers is
good, but it is not very useful for the detailed accounting I
need. I could also use per-socket TCP counters, like re-transmits,
etc. I have not looked to see if they are already there
or not...
Thanks,
Ben
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
From: Ben Greear <[email protected]>
Date: Sun, 09 Jun 2002 11:23:30 -0700
I need to account for packets on a per-session basis, where a
session endpoint is a UDP port. So, knowing global protocol numbers is
good, but it is not very useful for the detailed accounting I
need.
Why can't you just disable the other UDP services, and then there is
no question which UDP server/client is causing the drops.
Every argument I hear is one out of lazyness. And that is not a
reason to add something. Simply put, I don't want to add all of this
per-socket counter bumping that only, at best, 1 tenth of 1 percent
of people will use. This means that the rest of the world eats the
overhead just for this small group that actually uses it.
On Sun, Jun 09, 2002 at 09:34:40PM -0700, David S. Miller wrote:
> From: Ben Greear <[email protected]>
> Date: Sun, 09 Jun 2002 11:23:30 -0700
> I need to account for packets on a per-session basis, where a
> session endpoint is a UDP port. So, knowing global protocol numbers is
> good, but it is not very useful for the detailed accounting I
> need.
> Why can't you just disable the other UDP services, and then there is
> no question which UDP server/client is causing the drops.
If the application only had 10 or fewer, non-critical UDP ports
sending data, this conclusion might apply. However, even then, this
suggestions seems a little silly. "Why don't you call for a full stop
and then try them one by one?" is what I read this suggestion as
being.
> Every argument I hear is one out of lazyness. And that is not a
> reason to add something. Simply put, I don't want to add all of this
> per-socket counter bumping that only, at best, 1 tenth of 1 percent
> of people will use. This means that the rest of the world eats the
> overhead just for this small group that actually uses it.
Is it 'laziness' that the application needs to be able to minimize every
last CPU cycle, or is it 'optimization'?
To many designers, the determination that one should *be* lazy is
considered a virtue. The opposite extreme would suggest that "well
TCP/IP shouldn't be built into the kernel anyways... application
writers are just too lazy to implement the TCP/IP stack in user
space... it doesn't belong in the kernel..."
As for the "rest of the world eats the overhead just for this small group
that actually uses it"... this would be true... if every single Linux
kernel was built with the exact same configuration.
What am I saying? I haven't seen an effective argument against the
requirement, and I can see potential uses *for* the requirement.
Feel free to provide an effective argument against. :-)
Until then...
mark
--
[email protected]/[email protected]/[email protected] __________________________
. . _ ._ . . .__ . . ._. .__ . . . .__ | Neighbourhood Coder
|\/| |_| |_| |/ |_ |\/| | |_ | |/ |_ |
| | | | | \ | \ |__ . | | .|. |__ |__ | \ |__ | Ottawa, Ontario, Canada
One ring to rule them all, one ring to find them, one ring to bring them all
and in the darkness bind them...
http://mark.mielke.cc/
David S. Miller wrote:
> From: Ben Greear <[email protected]>
> Date: Sun, 09 Jun 2002 11:23:30 -0700
>
> I need to account for packets on a per-session basis, where a
> session endpoint is a UDP port. So, knowing global protocol numbers is
> good, but it is not very useful for the detailed accounting I
> need.
>
> Why can't you just disable the other UDP services, and then there is
> no question which UDP server/client is causing the drops.
I run multiple connections at once, so this is not a useful alternative.
My application is fairly unique, though people doing stuff like RTP and
other streaming UDP sessions may be interested in similar counters.
> Every argument I hear is one out of lazyness. And that is not a
Well, that pretty much finishes this discussion I guess.
Thanks,
Ben
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
At 09:34 PM 9/06/2002 -0700, David S. Miller wrote:
>Every argument I hear is one out of lazyness. And that is not a
>reason to add something. Simply put, I don't want to add all of this
>per-socket counter bumping that only, at best, 1 tenth of 1 percent
>of people will use. This means that the rest of the world eats the
>overhead just for this small group that actually uses it.
would you be willing to accept a patch that enables per-socket accounting
with a CONFIG_ option?
to my mind, i can see a number of perfectly valid scenarios.
one is for streaming-media applications which could use retransmissions as
an indication to buffer more data and/or switch to a different bitrate.
another is for a http proxy which has multiple outgoing interfaces which
are multihomed via different providers (and some via simplex satellite).
retransmissions woud be a nice metric to use for determining the weightings
between using different interfaces.
cheers,
lincoln.
From: Lincoln Dale <[email protected]>
Date: Mon, 10 Jun 2002 22:03:25 +1000
would you be willing to accept a patch that enables per-socket
accounting with a CONFIG_ option?
What is the point?
If all the dists will enable it then everybody eats the overhead.
If the dists don't enable it, how useful is it and what's so wrong
with it being an external patch people just apply when they need to
diagnose something like this?
On Mon, 10 Jun 2002, David S. Miller wrote:
> From: Lincoln Dale <[email protected]>
> Date: Mon, 10 Jun 2002 22:03:25 +1000
>
> would you be willing to accept a patch that enables per-socket
> accounting with a CONFIG_ option?
>
> What is the point?
>
> If all the dists will enable it then everybody eats the overhead.
> If the dists don't enable it, how useful is it and what's so wrong
> with it being an external patch people just apply when they need to
> diagnose something like this?
>
I think i would agree with Dave for it to be an external patch. You
really only need this during debugging. I had a similar patch when
debugging NAPI about a year ago. I didnt find it that useful after
a while because i could deduce the losses from SNMP/netstat output.
cheers,
jamal
On Mon, Jun 10, 2002 at 08:24:44AM -0400, jamal wrote:
> On Mon, 10 Jun 2002, David S. Miller wrote:
> > From: Lincoln Dale <[email protected]>
> > Date: Mon, 10 Jun 2002 22:03:25 +1000
> > would you be willing to accept a patch that enables per-socket
> > accounting with a CONFIG_ option?
> > What is the point?
> > If all the dists will enable it then everybody eats the overhead.
> > If the dists don't enable it, how useful is it and what's so wrong
> > with it being an external patch people just apply when they need to
> > diagnose something like this?
> I think i would agree with Dave for it to be an external patch. You
> really only need this during debugging. I had a similar patch when
> debugging NAPI about a year ago. I didnt find it that useful after
> a while because i could deduce the losses from SNMP/netstat output.
In your case you found that you could solve it once by debugging the
application.
This doesn't mean that other applications would not be better at
determining the code path to use at execution time.
Just because eth1 is behaving perfectly (i.e. low overall dropped UDP
packets, or low TCP/IP retransmission) does not mean that a specific
socket currently on eth1 heading to China should assume that it can
take the 'average' observation as adequate for observing the specific
socket.
There *are* applications that would benefit from making this decision
at run time on a socket-by-socket basis. It is not a common requirement
for most desktop users, but it remains a valid requirement.
Providing it as a patch, can have the effect that it becomes more trouble
than it is worth to grant other people access to the feature, especially
from a corporate environment that has signed off on being able to release
patches made to Linux back to the Linux source tree.
Seems somewhat of a loss...
mark
--
[email protected]/[email protected]/[email protected] __________________________
. . _ ._ . . .__ . . ._. .__ . . . .__ | Neighbourhood Coder
|\/| |_| |_| |/ |_ |\/| | |_ | |/ |_ |
| | | | | \ | \ |__ . | | .|. |__ |__ | \ |__ | Ottawa, Ontario, Canada
One ring to rule them all, one ring to find them, one ring to bring them all
and in the darkness bind them...
http://mark.mielke.cc/
On Mon, 10 Jun 2002, Mark Mielke wrote:
> On Mon, Jun 10, 2002 at 08:24:44AM -0400, jamal wrote:
> > I think i would agree with Dave for it to be an external patch. You
> > really only need this during debugging. I had a similar patch when
> > debugging NAPI about a year ago. I didnt find it that useful after
> > a while because i could deduce the losses from SNMP/netstat output.
>
> In your case you found that you could solve it once by debugging the
> application.
>
> This doesn't mean that other applications would not be better at
> determining the code path to use at execution time.
>
> Just because eth1 is behaving perfectly (i.e. low overall dropped UDP
> packets, or low TCP/IP retransmission) does not mean that a specific
> socket currently on eth1 heading to China should assume that it can
> take the 'average' observation as adequate for observing the specific
> socket.
>
> There *are* applications that would benefit from making this decision
> at run time on a socket-by-socket basis. It is not a common requirement
> for most desktop users, but it remains a valid requirement.
>
I am confused as to which application needs this, do you have one in mind?
AFAIK, UDP/RTP type apps already know how to determine packet loss
on a per flow basis.
> Providing it as a patch, can have the effect that it becomes more trouble
> than it is worth to grant other people access to the feature, especially
> from a corporate environment that has signed off on being able to release
> patches made to Linux back to the Linux source tree.
>
You may be confusing technical merit to mean the same thing as corporate
donation. In Linux its the later that counts.
> Seems somewhat of a loss...
Your mileage may vary. Consider this - you have the opp to at least
make the patch available. Imagine trying to convince windriver.
cheers,
jamal
>
> mark
>
> --
> [email protected]/[email protected]/[email protected] __________________________
> . . _ ._ . . .__ . . ._. .__ . . . .__ | Neighbourhood Coder
> |\/| |_| |_| |/ |_ |\/| | |_ | |/ |_ |
> | | | | | \ | \ |__ . | | .|. |__ |__ | \ |__ | Ottawa, Ontario, Canada
>
> One ring to rule them all, one ring to find them, one ring to bring them all
> and in the darkness bind them...
>
> http://mark.mielke.cc/
>
On Mon, 10 Jun 2002, jamal wrote:
> You may be confusing technical merit to mean the same thing as corporate
> donation. In Linux its the later that counts.
Sorry meant the former.
cheers,
jamal
Would you (David) mind having a file in the kernel that has a
list/description of such patches and a URL to them? This is the kind of
patch I would like to know is available somewhere so I don't spent a
week looking for it when someone asks about it.
-d
> would you be willing to accept a patch that enables per-socket
> accounting with a CONFIG_ option?
>
>What is the point?
>
>If all the dists will enable it then everybody eats the overhead.
>If the dists don't enable it, how useful is it and what's so wrong
>with it being an external patch people just apply when they need to
>diagnose something like this?
>
Hi,
On Mon, 10 Jun 2002, David Ford wrote:
> Would you (David) mind having a file in the kernel that has a
> list/description of such patches and a URL to them? This is the kind of
> patch I would like to know is available somewhere so I don't spent a
> week looking for it when someone asks about it.
I'd rather suggest a dedicated website for it, such as the one at
kernelnewbies.org.
Regards,
Thunder
--
German attitude becoming | Thunder from the hill at ngforever
rightaway popular: |
"Get outa my way, | free inhabitant not directly
for I got a mobile phone!" | belonging anywhere
Then a list of websites with such material would be desired. I don't
know very many people that go to kernelnewbies to look for patches. It
isn't much to ask for and would give a great starting place for someone
looking for a specific answer.
-d
Thunder from the hill wrote:
>Hi,
>
>On Mon, 10 Jun 2002, David Ford wrote:
>
>
>>Would you (David) mind having a file in the kernel that has a
>>list/description of such patches and a URL to them? This is the kind of
>>patch I would like to know is available somewhere so I don't spent a
>>week looking for it when someone asks about it.
>>
>>
>
>I'd rather suggest a dedicated website for it, such as the one at
>kernelnewbies.org.
>
>Regards,
>Thunder
>
>
jamal wrote:
>
> On Mon, 10 Jun 2002, Mark Mielke wrote:
> > There *are* applications that would benefit from making this decision
> > at run time on a socket-by-socket basis. It is not a common requirement
> > for most desktop users, but it remains a valid requirement.
> >
>
> I am confused as to which application needs this, do you have one in mind?
> AFAIK, UDP/RTP type apps already know how to determine packet loss
> on a per flow basis.
The purpose of this patch is to make it reallly easy to nail down exactly how
many packets were dropped *per socket*, and for what reason. For me, the
information is then used to tune the application statically, but others could
use it dynamically. Incoming packets can be dropped at the device, at the
device driver, in netif_rx, or at the socket buffer. We've got stats on all of
these except for the socket buffer, so why not add them?
The cost in the normal case is incrementing a single variable in the socket
struct (which is likely already in cache since we're playing with it). I can't
see this being that expensive. In the failure path, we get a second increment.
Again, this is not going to be noticeable.
Sure, you can try and figure out which applications had sockets open, and how
many packets they missed, and subtract that from the snmp counters to give how
many packets you missed. But to do this you have to lock the box down--isn't it
a lot easier to just *know* because you've been keeping track?
Chris
--
Chris Friesen | MailStop: 043/33/F10
Nortel Networks | work: (613) 765-0557
3500 Carling Avenue | fax: (613) 765-2986
Nepean, ON K2H 8E9 Canada | email: [email protected]
On Sun, 9 Jun 2002, David S. Miller wrote:
> From: Ben Greear <[email protected]>
> Date: Sun, 09 Jun 2002 11:23:30 -0700
>
> I need to account for packets on a per-session basis, where a
> session endpoint is a UDP port. So, knowing global protocol numbers is
> good, but it is not very useful for the detailed accounting I
> need.
>
> Why can't you just disable the other UDP services, and then there is
> no question which UDP server/client is causing the drops.
Should be obvious that if a combination of load and client behaviour
cause the problem you will learn nothing.
> Every argument I hear is one out of lazyness. And that is not a
> reason to add something. Simply put, I don't want to add all of this
> per-socket counter bumping that only, at best, 1 tenth of 1 percent
> of people will use. This means that the rest of the world eats the
> overhead just for this small group that actually uses it.
Actually your arguments sound like you have a solution to your problem
and you want everyone to use it even if it doesn't help them. Have you
some emotional tie to SNMP, like being an author?
There is no overhead unless the config option is selected, which would
be done in a normal kernel source, just as verbose messages, highmem
debugging, singing and dancing SYSREQ, debugging SCSI driver, and many,
many other features. So the argument against load is totally irrelevant.
I can't see why anyone would be against a feature just because they
don't personally use it, there is so much stuff of specialized use now,
that a it sure sounds like existing practice. I even think that the
implementation is general and could be extended to gather other
per-connection stats which is a big plus in terms of design quality.
--
bill davidsen <[email protected]>
CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.
From: Bill Davidsen <[email protected]>
Date: Tue, 11 Jun 2002 18:41:16 -0400 (EDT)
Actually your arguments sound like you have a solution to your problem
and you want everyone to use it even if it doesn't help them. Have you
some emotional tie to SNMP, like being an author?
After a comment like this, I have no interest in listening to anything
else you have to say. I've been maintaining the Linux networking for
5 or more years now, and the most important thing I do is say no to
changes.
Franks a lot,
David S. Miller
[email protected]
On Tue, Jun 11, 2002 at 08:41:19PM -0700, David S. Miller wrote:
> From: Bill Davidsen <[email protected]>
> Date: Tue, 11 Jun 2002 18:41:16 -0400 (EDT)
>
> Actually your arguments sound like you have a solution to your problem
> and you want everyone to use it even if it doesn't help them. Have you
> some emotional tie to SNMP, like being an author?
Basically Bill, if you don't like this policy, fork the code. That is
one of the strengths (and weaknesses) of open source. If your tree
works better and gets wider use, then something about it must be better.
If not, then maybe it wasn't. This community works on reputation
capital (and some diplomacy).
> After a comment like this, I have no interest in listening to anything
> else you have to say. I've been maintaining the Linux networking for
> 5 or more years now, and the most important thing I do is say no to
> changes.
>
> Franks a lot,
> David S. Miller
> [email protected]
slainte mhath, RGB
--
Richard Guy Briggs -- ~\ Auto-Free Ottawa! Canada
<http://www.TriColour.net> -- \@ @ <http://www.flora.org/afo/>
No Internet Wiretapping! -- _\\/\%___\\/\% Vote! -- <Green.ca>
<http://www.FreeSWAN.org>_______GTVS6#790__(*)_______(*)(*)_______<www.Marillion.com>
On Tue, Jun 11, 2002 at 11:57:26PM -0400, Richard Guy Briggs wrote:
> On Tue, Jun 11, 2002 at 08:41:19PM -0700, David S. Miller wrote:
> > From: Bill Davidsen <[email protected]>
> > Date: Tue, 11 Jun 2002 18:41:16 -0400 (EDT)
> > Actually your arguments sound like you have a solution to your problem
> > and you want everyone to use it even if it doesn't help them. Have you
> > some emotional tie to SNMP, like being an author?
> Basically Bill, if you don't like this policy, fork the code. That is
> one of the strengths (and weaknesses) of open source. If your tree
> works better and gets wider use, then something about it must be better.
> If not, then maybe it wasn't. This community works on reputation
> capital (and some diplomacy).
To some degree (i.e. I know it is not intentional), this comes across as
blackmail.
Sorta like "if you want to play ball with me, you play by my rules,
otherwise you can go find your own diamond and your own friends to
play with."
I would still like to see David's logic as to why the approach is bad.
So far it amounts to 1) David doesn't like it, 2) David doesn't see a need
for it, or can see other less adequate methods of approximating the same
effect, and 3) David suspects that it will effect the performance of all
users to provide a limited gain for some applications.
'Reputation capital' is earned. I would like to see it 'earned' in
practice given a real requirement from a real developer on a real
world class application.
Statements such as 'the most important thing I do is say no', don't
convince me that a reputation is deserved. The extreme of this is
that an automaton could say no to everything.
Yes, I might be testing David... is it fair? Well, the requirement was
fair, so how could it not be fair?
I (and Bill) are just asking for some logic to show us where we are
wrong. Linus can pull the "I'm god, and that's that" gig. Fine. How
far does the godhead extend? Who else can pull this gig?
I am waiting in anticipation to be shown the error of my ways using
proper logic and iron clad reasoning... :-)
Cheers,
mark
--
[email protected]/[email protected]/[email protected] __________________________
. . _ ._ . . .__ . . ._. .__ . . . .__ | Neighbourhood Coder
|\/| |_| |_| |/ |_ |\/| | |_ | |/ |_ |
| | | | | \ | \ |__ . | | .|. |__ |__ | \ |__ | Ottawa, Ontario, Canada
One ring to rule them all, one ring to find them, one ring to bring them all
and in the darkness bind them...
http://mark.mielke.cc/
On Wed, 12 Jun 2002, Mark Mielke wrote:
> On Tue, Jun 11, 2002 at 11:57:26PM -0400, Richard Guy Briggs wrote:
> > On Tue, Jun 11, 2002 at 08:41:19PM -0700, David S. Miller wrote:
> > > From: Bill Davidsen <[email protected]>
> > > Date: Tue, 11 Jun 2002 18:41:16 -0400 (EDT)
> > > Actually your arguments sound like you have a solution to your problem
> > > and you want everyone to use it even if it doesn't help them. Have you
> > > some emotional tie to SNMP, like being an author?
> > Basically Bill, if you don't like this policy, fork the code. That is
> > one of the strengths (and weaknesses) of open source. If your tree
> > works better and gets wider use, then something about it must be better.
> > If not, then maybe it wasn't. This community works on reputation
> > capital (and some diplomacy).
>
> To some degree (i.e. I know it is not intentional), this comes across as
> blackmail.
>
> Sorta like "if you want to play ball with me, you play by my rules,
> otherwise you can go find your own diamond and your own friends to
> play with."
>
> I would still like to see David's logic as to why the approach is bad.
>
> So far it amounts to 1) David doesn't like it, 2) David doesn't see a need
> for it, or can see other less adequate methods of approximating the same
> effect, and 3) David suspects that it will effect the performance of all
> users to provide a limited gain for some applications.
Just to chime in my support (not that I don't think anyone needs it), I
think socket-based counters are An Extremely Bad Idea.
They might be useful in some rather specific debugging scenarios (like
100's on debugging printk's too!), but definitely not something we want to
be cluttering the main tree with, _especially_ before they've shown their
worth (doubtful).
--
Pekka Savola "Tell me of difficulties surmounted,
Netcore Oy not those you stumble over and fall"
Systems. Networks. Security. -- Robert Jordan: A Crown of Swords
Pekka Savola wrote:
> Just to chime in my support (not that I don't think anyone needs it), I
> think socket-based counters are An Extremely Bad Idea.
Several folks have produced arguments with details showing how they
can use the counters to better their product and/or debugging. Just
waving your hands and saying you don't like it does not invalidate
their claims. Please go back and read the thread and then, if you're
able, put forth some valid arguments for how to accomplish the goals
in some other manner, or show the negatives of including the feature.
If they are useful to some people, and have zero performance affect on others
(due to being a configurable kernel feature), then what is your
complaint?
I see two reasons left to dislike this feature:
1) General increase in #ifdef'd code. This actually seems like
a pretty good argument, but I haven't seen anyone mention it
specifically.
2) General dislike for a feature that one personally has no use for.
Seems to be Dave's main (professed) excuse.
Please add to this list, but back up your claims.
Thanks,
Ben
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
On Tue, 11 Jun 2002, Ben Greear wrote:
> If they are useful to some people, and have zero performance affect on others
> (due to being a configurable kernel feature), then what is your
> complaint?
3) Added features and complexity makes it more difficult to maintain the
kernel (you could say this is a variant of 1)
4) Patches that have only a little debugging/etc. value are probably
useful, but mainly for a specific set of people, and this would seem to be
best handled by external patches.
> 1) General increase in #ifdef'd code. This actually seems like
> a pretty good argument, but I haven't seen anyone mention it
> specifically.
Always implied from maintenance point-of-view.
--
Pekka Savola "Tell me of difficulties surmounted,
Netcore Oy not those you stumble over and fall"
Systems. Networks. Security. -- Robert Jordan: A Crown of Swords
Pekka Savola wrote:
> On Tue, 11 Jun 2002, Ben Greear wrote:
>
>>If they are useful to some people, and have zero performance affect on others
>>(due to being a configurable kernel feature), then what is your
>>complaint?
>>
>
> 3) Added features and complexity makes it more difficult to maintain the
> kernel (you could say this is a variant of 1)
Adding counters to structures generally is not going to increase
complexity (especially when you comment the code). It would increase
the code size slightly.
The code to bump the counters should also be extremely simple
(surely we don't drop packets in more than just a few places).
So, in this case, the increase in complexity seems pretty minimal.
> 4) Patches that have only a little debugging/etc. value are probably
> useful, but mainly for a specific set of people, and this would seem to be
> best handled by external patches.
External-only patches almost always rot, and are extremely hard to really
share across organizations. Still, point taken.
Thanks,
Ben
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
On Wed, Jun 12, 2002 at 01:20:04AM -0400, Mark Mielke wrote:
> On Tue, Jun 11, 2002 at 11:57:26PM -0400, Richard Guy Briggs wrote:
> > Basically Bill, if you don't like this policy, fork the code. That
> > is one of the strengths (and weaknesses) of open source. If your
> > tree works better and gets wider use, then something about it must
> > be better. If not, then maybe it wasn't. This community works on
> > reputation capital (and some diplomacy).
>
> To some degree (i.e. I know it is not intentional), this comes across
> as blackmail.
>
> Sorta like "if you want to play ball with me, you play by my rules,
> otherwise you can go find your own diamond and your own friends to
> play with."
>
> I would still like to see David's logic as to why the approach is bad.
Perhaps the reason David Miller is the network stack maintainer is that
he can see things that you can't see? The onus is on you (as proposer)
to show _him_ that a change is worthwhile, not the other way round.
Don't be confused into thinking this is some sort of democracy.
> So far it amounts to 1) David doesn't like it, 2) David doesn't see a
> need for it, or can see other less adequate methods of approximating
> the same effect, and 3) David suspects that it will effect the
> performance of all users to provide a limited gain for some
> applications.
Yup. What you've left out is:
4)David has demonstrated that he knows his stuff by high-quality work
over a long period of time
5)You haven't.
6)Because he is the network maintainer you need to convince him and you
haven't done so.
> 'Reputation capital' is earned. I would like to see it 'earned' in
> practice given a real requirement from a real developer on a real
> world class application.
>
> Statements such as 'the most important thing I do is say no', don't
> convince me that a reputation is deserved. The extreme of this is that
> an automaton could say no to everything.
Bollocks. Its plainly ridiculous to impugne David Miller and imply his
reputation isn't earned. You or I or your straw automaton could say
"no", but it doesn't mean anyone will listen.
Furthermore, I would propose that the Linux TCP/IP networking stack is "world
class" and has earned David Miller (and others) a fair bit of "reputation
capital", and that's why he (and not you or I) is where he is.
> Yes, I might be testing David... is it fair? Well, the requirement was
> fair, so how could it not be fair?
>
> I (and Bill) are just asking for some logic to show us where we are
> wrong. Linus can pull the "I'm god, and that's that" gig. Fine. How
> far does the godhead extend? Who else can pull this gig?
Show why it should be in the main tree and not just a debugging patch.
> I am waiting in anticipation to be shown the error of my ways using
> proper logic and iron clad reasoning... :-)
I think its more like Dave M et al are waiting for someone to show the
slightest bit of evidence to say this isn't better off as just a
debugging patch rather than just waving hands and talking bollocks.
Sean
[Cc:'s stripped]
Ben Greear <[email protected]> said:
> Pekka Savola wrote:
> > Just to chime in my support (not that I don't think anyone needs it), I
> > think socket-based counters are An Extremely Bad Idea.
[...]
> If they are useful to some people, and have zero performance affect on others
> (due to being a configurable kernel feature), then what is your
> complaint?
That it adds code, which impacts _everybody_ futzing around in that area,
specially if it is a configurable option (this means multiplying the
possible configurations to be tested).
> I see two reasons left to dislike this feature:
>
> 1) General increase in #ifdef'd code. This actually seems like
> a pretty good argument, but I haven't seen anyone mention it
> specifically.
Right.
> 2) General dislike for a feature that one personally has no use for.
> Seems to be Dave's main (professed) excuse.
General dislike for adding features of _extremely_ limited (debugging!) use?
--
Dr. Horst H. von Brand User #22616 counter.li.org
Departamento de Informatica Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria +56 32 654239
Casilla 110-V, Valparaiso, Chile Fax: +56 32 797513
At 08:11 AM 12/06/2002 -0400, Horst von Brand wrote:
>General dislike for adding features of _extremely_ limited (debugging!) use?
i would imagine that every installation of Squid on linux is interested in
having _realistic transaction logs_ of exactly how much data was received
and transmitted on a TCP connection.
i know of many many folk who use transaction logs from HTTP caches for
volume-based billing.
right now, those bills are anywhere between 10% to 25% incorrect.
you call that "extremely limited"?
of course, i am doing exactly what Dave said to do -- maintaining my own
out-of-kernel patch -- but its a pain, i'm sure it will soon conflict with
stuff and is a damn shame - it isn't much code, but Dave seems pretty
steadfast that he isn't interested.
damn shame that. i think the information is on par with
getsockopt(..,TCP_INFO,..) in terms of usefulness yet TCP_INFO is there in
the kernel.
cheers,
lincoln.
On Wed, 12 Jun 2002, Lincoln Dale wrote:
> At 08:11 AM 12/06/2002 -0400, Horst von Brand wrote:
> >General dislike for adding features of _extremely_ limited (debugging!) use?
>
> i would imagine that every installation of Squid on linux is interested in
> having _realistic transaction logs_ of exactly how much data was received
> and transmitted on a TCP connection.
>
> i know of many many folk who use transaction logs from HTTP caches for
> volume-based billing.
> right now, those bills are anywhere between 10% to 25% incorrect.
>
> you call that "extremely limited"?
>
Surely, you must have better ways to do accounting than this -- otherwise
you deserve to loose money.
>
> of course, i am doing exactly what Dave said to do -- maintaining my own
> out-of-kernel patch -- but its a pain, i'm sure it will soon conflict with
> stuff and is a damn shame - it isn't much code, but Dave seems pretty
> steadfast that he isn't interested.
>
You havent proven why its needed. And from the looks of it you dont even
need it. If 3 people need it, then i would like to ask we add lawn mower
support that my relatives have been asking for the last 5 years.
cheers,
jamal
At 08:33 AM 12/06/2002 -0400, jamal wrote:
> > i know of many many folk who use transaction logs from HTTP caches for
> > volume-based billing.
> > right now, those bills are anywhere between 10% to 25% incorrect.
> >
> > you call that "extremely limited"?
>
>Surely, you must have better ways to do accounting than this -- otherwise
>you deserve to loose money.
many people don't have better ways to do accounting than this.
in the case of Squid and Linux, they're typically using it because its
open-source and "free".
they want to use HTTP Caching to save bandwidth (and therefore save money),
but they also live in a regime of volume-based billing. (not everywhere on
the planet is fixed-$/month for DSL).
the unfortunate solution is to use HTTP Transaction logs, which count
payload at layer-7, not payload+headers+retransmissions at layer-3.
> > of course, i am doing exactly what Dave said to do -- maintaining my own
> > out-of-kernel patch -- but its a pain, i'm sure it will soon conflict with
> > stuff and is a damn shame - it isn't much code, but Dave seems pretty
> > steadfast that he isn't interested.
>
>You havent proven why its needed. And from the looks of it you dont even
>need it.
i don't need it because i already have it in my kernel.
but thats where it ends -- its destined to forever be a private patch.
>If 3 people need it, then i would like to ask we add lawn mower
>support that my relatives have been asking for the last 5 years.
lawn-mower support sounds like a userspace application to me.
cheers,
lincoln.
On Wed, 12 Jun 2002, Lincoln Dale wrote:
> At 08:33 AM 12/06/2002 -0400, jamal wrote:
> > > i know of many many folk who use transaction logs from HTTP caches for
> > > volume-based billing.
> > > right now, those bills are anywhere between 10% to 25% incorrect.
> > >
> > > you call that "extremely limited"?
> >
> >Surely, you must have better ways to do accounting than this -- otherwise
> >you deserve to loose money.
>
> many people don't have better ways to do accounting than this.
>
Then they dont care about loosing money.
There's nothing _more important_ to a service provider than ability to do
proper billing. Otherwise, they are a charity organization.
> in the case of Squid and Linux, they're typically using it because its
> open-source and "free".
I am hoping you didnt mean to say squid was only good because it has
these perks.
>
> they want to use HTTP Caching to save bandwidth (and therefore save money),
> but they also live in a regime of volume-based billing. (not everywhere on
> the planet is fixed-$/month for DSL).
>
> the unfortunate solution is to use HTTP Transaction logs, which count
> payload at layer-7, not payload+headers+retransmissions at layer-3.
>
Look at your own employers eqpt if you want to do this right.
And then search around freshmeat so you dont reinvent the wheel.
> > > of course, i am doing exactly what Dave said to do -- maintaining my own
> > > out-of-kernel patch -- but its a pain, i'm sure it will soon conflict with
> > > stuff and is a damn shame - it isn't much code, but Dave seems pretty
> > > steadfast that he isn't interested.
> >
> >You havent proven why its needed. And from the looks of it you dont even
> >need it.
>
> i don't need it because i already have it in my kernel.
> but thats where it ends -- its destined to forever be a private patch.
>
And until you prove it is worth it and useful to other people then
forever thats where it belongs. I now of nobody serious about billing
who is using sockets stats as the transaction point.
> >If 3 people need it, then i would like to ask we add lawn mower
> >support that my relatives have been asking for the last 5 years.
>
> lawn-mower support sounds like a userspace application to me.
>
But we need a new system call support
cheers,
jamal
[ trimmed cc ]
On Wed, Jun 12, 2002 at 10:28:15PM +1000, Lincoln Dale wrote:
> right now, those bills are anywhere between 10% to 25% incorrect.
10-25% is roughly equivalent to the hit rates i've seen on my web caches
in real life, with MANY users. i can't believe people are actually
using this for data accounting.
i also can't think of many decent "free" (whatever your interpretation
of that is) ways to do it either. interface packet counters wrap
around, most commercial firewalls i've used have inaccurate or
incomplete logging (to put it lightly), and packet sniffers sometimes
can't keep up.
surely if profit (or just keeping your head above the water) is the goal
you can justify the necessary resources to use something like netflow, a
product designed to do exactly what you seem to want, among other
things. (search cisco.com, and no, i'm not a cisco employee)
fiddling the network stack so that you can do dubious hacks in an
allegedly apparently dubious aspect of squid just doesn't seem to be the
ideal way to fix this problem.
regards,
j.
--
toyota power: http://indigoid.net/
On Wed, Jun 12, 2002 at 09:00:08AM -0400, jamal wrote:
> On Wed, 12 Jun 2002, Lincoln Dale wrote:
> > At 08:33 AM 12/06/2002 -0400, jamal wrote:
> > >If 3 people need it, then i would like to ask we add lawn mower
> > >support that my relatives have been asking for the last 5 years.
> > lawn-mower support sounds like a userspace application to me.
> But we need a new system call support
This is another non-argument not dissimilar to the method of arguing that
David has used up to this point.
If lawn-mower support (whatever that is) is something that people
would use, then perhaps it *should* be added, even if it needs a new
system call. You have not shown a valid argument against your own
sarcastic suggestion, other than an implicit sneer.
There is no evidence that only three people would use a feature that
allows one to measure the exact bandwidth being used by a specific
TCP/IP connection (including retransmissions). There is evidence that
if such a patch was not accepted into the kernel that people that
desired this feature would either reinvent the wheel, because they
could not locate the private patch, likely doing it *wrong* because
they did not have wonderful people such as David to make strategic
suggestions regarding the exact implementation, or that they would
find other less adequate ways of doing something that approximates
what they actually need using existing functionality.
Anyways... I'll drop out of this one as my presence here was only
to try to encourage creativity, not to create any anger. I never
intended to slight David.
I would like to see stronger arguments presented when saying no to a
feature, as they allow me, and others around here, to learn. Cliche's
don't teach me anything, and they make the speaker appear less
qualified. (Appearance may != Reality)
Good work David, and I look forward to seeing clearer objections.
mark
--
[email protected]/[email protected]/[email protected] __________________________
. . _ ._ . . .__ . . ._. .__ . . . .__ | Neighbourhood Coder
|\/| |_| |_| |/ |_ |\/| | |_ | |/ |_ |
| | | | | \ | \ |__ . | | .|. |__ |__ | \ |__ | Ottawa, Ontario, Canada
One ring to rule them all, one ring to find them, one ring to bring them all
and in the darkness bind them...
http://mark.mielke.cc/
On Thu, Jun 13, 2002 at 12:52:13AM +1000, john slee wrote:
> [ trimmed cc ]
> On Wed, Jun 12, 2002 at 10:28:15PM +1000, Lincoln Dale wrote:
> > right now, those bills are anywhere between 10% to 25% incorrect.
> ...
> surely if profit (or just keeping your head above the water) is the goal
> you can justify the necessary resources to use something like netflow, a
> product designed to do exactly what you seem to want, among other
> things. (search cisco.com, and no, i'm not a cisco employee)
"if <place goal here> is the goal, you can justify using something other
than Linux".
I like seeing Linux in the commercial space. This may be unreasonable of me.
mark
--
[email protected]/[email protected]/[email protected] __________________________
. . _ ._ . . .__ . . ._. .__ . . . .__ | Neighbourhood Coder
|\/| |_| |_| |/ |_ |\/| | |_ | |/ |_ |
| | | | | \ | \ |__ . | | .|. |__ |__ | \ |__ | Ottawa, Ontario, Canada
One ring to rule them all, one ring to find them, one ring to bring them all
and in the darkness bind them...
http://mark.mielke.cc/
On Wed, 12 Jun 2002, Mark Mielke wrote:
> On Wed, Jun 12, 2002 at 09:00:08AM -0400, jamal wrote:
> > On Wed, 12 Jun 2002, Lincoln Dale wrote:
> > > At 08:33 AM 12/06/2002 -0400, jamal wrote:
> > > >If 3 people need it, then i would like to ask we add lawn mower
> > > >support that my relatives have been asking for the last 5 years.
> > > lawn-mower support sounds like a userspace application to me.
> > But we need a new system call support
>
> This is another non-argument not dissimilar to the method of arguing that
> David has used up to this point.
>
> If lawn-mower support (whatever that is) is something that people
> would use, then perhaps it *should* be added, even if it needs a new
> system call. You have not shown a valid argument against your own
> sarcastic suggestion, other than an implicit sneer.
>
It was meant to be humorous. I am sure Lincoln meant it that way as well.
Next time i'll put a smiley. How about hairdryer support? ;->
cheers,
jamal
[CC:'s chopped down to the lists]
Mark Mielke <[email protected]>
> On Wed, Jun 12, 2002 at 09:00:08AM -0400, jamal wrote:
> > On Wed, 12 Jun 2002, Lincoln Dale wrote:
> > > At 08:33 AM 12/06/2002 -0400, jamal wrote:
> > > >If 3 people need it, then i would like to ask we add lawn mower
> > > >support that my relatives have been asking for the last 5 years.
> > > lawn-mower support sounds like a userspace application to me.
> > But we need a new system call support
>
> This is another non-argument not dissimilar to the method of arguing that
> David has used up to this point.
>
> If lawn-mower support (whatever that is) is something that people
> would use, then perhaps it *should* be added, even if it needs a new
> system call. You have not shown a valid argument against your own
> sarcastic suggestion, other than an implicit sneer.
Linux development has _always_ worked by:
1) You have a problem
2) You come up with a solution
3) Others use your patch, perhaps refine it
4) A discussion ensues on the worthyness of the patch
5) The community (or at least the halfgods in charge of keeping the Holy
Source ;-) sees that the patch is worthwile, tested, and has enough
users
6) After some further cleanups and fixes the patch is accepted into the
kernel
7) The code is carried as part of the standard kernel, and updated with it
Being halfway through (2) or going on (3) and whining that _others_ do the
work to take care of finishing implementing a solution and then maintaining
it for you (jumping to (7)) won't get you anywehere. Guaranteed.
Perhaps your proposed solution is subobtimal.
Perhaps your problem is so outlandish that a solution has no place in the
standard kernel.
Perhaps solving the problem, even a common one, isn't worth the effort in
placing a solution in the kernel, and then maintaining it forever.
--
Dr. Horst H. von Brand User #22616 counter.li.org
Departamento de Informatica Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria +56 32 654239
Casilla 110-V, Valparaiso, Chile Fax: +56 32 797513
At 12:52 AM 13/06/2002 +1000, john slee wrote:
>[ trimmed cc ]
>
>On Wed, Jun 12, 2002 at 10:28:15PM +1000, Lincoln Dale wrote:
> > right now, those bills are anywhere between 10% to 25% incorrect.
>
>10-25% is roughly equivalent to the hit rates i've seen on my web caches
>in real life, with MANY users. i can't believe people are actually
>using this for data accounting.
ho hum. we're straying from the topic here, but you've missed the point.
yes, a typical HTTP cache has a hit-rate around 30-35%.
any "margin of error" is on the basis of TCP retransmissions, IP+TCP header
overhead, ...
that wouldn't negate a hit-rate. rather, its 10-25% margin-of-error on the
30-35% hit-rate. ie. (0.1 x 0.35) or: 3.5% of the hit-rate may be in error.
but seeing as a HTTP cache is in essence glueing two TCP connections
togethers, whats to say that the client wouldn't have had the TCP
retransmissions in the first place? all i'm talking about is ensuring that
application "accounting" information is accurate.
right now, there is no method of doing that since the OS doesn't provide
that data.
>i also can't think of many decent "free" (whatever your interpretation
>of that is) ways to do it either. interface packet counters wrap
>around, most commercial firewalls i've used have inaccurate or
>incomplete logging (to put it lightly), and packet sniffers sometimes
>can't keep up.
>
>surely if profit (or just keeping your head above the water) is the goal
>you can justify the necessary resources to use something like netflow, a
>product designed to do exactly what you seem to want, among other
>things. (search cisco.com, and no, i'm not a cisco employee)
since we're well away from the real issue anyway ....
i know a thing or two about netflow and other accounting mechanisms that
cisco has. (i am a cisco employee even if this particular topic isn't
related to the work i do for them).
netflow counts statistics at layer2/3/4.
unfortunately, its rather hard to use netflow when a single stream goes to
two places.
e.g. an idle HTTP/1.1 persistent connection can be reused for a different
HTTP client. netflow will only show a single flow, but there may be
multiple HTTP requests from multiple (downstream) clients on that.
how about Pipelined Persistent connections?
what about streaming-media where a live stream is split to multiple clients?
>fiddling the network stack so that you can do dubious hacks in an
>allegedly apparently dubious aspect of squid just doesn't seem to be the
>ideal way to fix this problem.
since you obviously know lots about this topic, what "superior solutions"
do you have?
cheers,
lincoln.
PS. personally i don't care if squid has this or not - its been many many
years since i contributed any code to squid - but i do have a personal
interest in helping make linux a better OS.
>i know of many many folk who use transaction logs from HTTP caches for
>volume-based billing.
>right now, those bills are anywhere between 10% to 25% incorrect.
You are being paid to deliver packets to their destination, not to drop
them.
DS
> >i know of many many folk who use transaction logs from HTTP caches for
> >volume-based billing.
> >right now, those bills are anywhere between 10% to 25% incorrect.
>
> You are being paid to deliver packets to their destination, not
> to drop
>them.
ho hum.
yes, that is typically true of a transit provider.
however, the transit provider wants to charge the customer not just for
what is delivered to layer-7, but also for packetization overhead at
layer-2/layer-3/layer-4. after all, the IP+TCP packet headers are
delivered to the client as well, no?
this is just my point: there is NO method to account for those pesky headers!
also, think of the case where a HTTP Proxy _isn't_ in the path of
traffic. from this side of the Pacific, if there are TCP retransmissions,
they are end-to-end retransmissions, across that really expensive piece of
wet string otherwise known as an undersea cable. _that_ is the most
expensive hop and if a customer is congested on the last mile, they're
still eating into the expensive bandwidth!
discussions on the layer-8 (religion) or layer-9 (politics) aspects of
whether it is correct to bill based on that is irrelevant. what is
relevant is that there isn't any mechanism to count the overhead of
packetization or the overhead of using a "reliable stream transport" such
as TCP.
i do have the code to do this. its relatively trivial and consumes an
extra 8 bytes of RAM per socket.
it doesn't obfuscate the existing kernel code nor does it slow the code
down by any tangible amount.
it is a compile-time option so for those people who don't know or care
about it, it doesn't impact them at all.
yet, clearly, Dave and Jamal are vehemently opposed to it.
alas, that means it stays as an out-of-kernel patch and will likely
continue to suffer bit-rot as time goes by. c'est la vie.
cheers,
lincoln.
On Thu, 13 Jun 2002 18:44:20 +1000, Lincoln Dale wrote:
>>You are being paid to deliver packets to their destination, not
>>to drop
>>them.
>ho hum.
>yes, that is typically true of a transit provider.
>however, the transit provider wants to charge the customer not just for
>what is delivered to layer-7, but also for packetization overhead at
>layer-2/layer-3/layer-4. after all, the IP+TCP packet headers are
>delivered to the client as well, no?
Actually, the customer really only cares about payload. And if the overhead
is a constant ratio to the data, just adjust the rate and it all comes out in
the wash. The only reason to care is if you want to play games, such as
saying you charge the same rate 'per byte' as another provider when you
really don't.
>this is just my point: there is NO method to account for those pesky
>headers!
No is there any need to. Do you really need to differentially account for
customers who have more 'overhead bytes per data byte' than others? Or can
you just say it's 12% and raise/lower your rates 12%
>also, think of the case where a HTTP Proxy _isn't_ in the path of
>traffic. from this side of the Pacific, if there are TCP retransmissions,
>they are end-to-end retransmissions, across that really expensive piece of
>wet string otherwise known as an undersea cable. _that_ is the most
>expensive hop and if a customer is congested on the last mile, they're
>still eating into the expensive bandwidth!
But accounting for the retransmissions won't help you, because you can't
tell retransmissions that the customer should rightly pay for versus
retransmissions that he shouldn't. So again, you can't do any better than to
just work it into the base price.
>discussions on the layer-8 (religion) or layer-9 (politics) aspects of
>whether it is correct to bill based on that is irrelevant. what is
>relevant is that there isn't any mechanism to count the overhead of
>packetization or the overhead of using a "reliable stream transport" such
>as TCP.
I realize that, but I don't see what good it is. So yes, it's something
that's currently hard to do, but if there's no legitimate need, then it
doesn't matter whether it's hard or not. You pose problems and you pose a
solution, but unless you can show that the solution actually solves the
problem, the solution is of no value.
>i do have the code to do this. its relatively trivial and consumes an
>extra 8 bytes of RAM per socket.
>it doesn't obfuscate the existing kernel code nor does it slow the code
>down by any tangible amount.
>it is a compile-time option so for those people who don't know or care
>about it, it doesn't impact them at all.
>yet, clearly, Dave and Jamal are vehemently opposed to it.
>alas, that means it stays as an out-of-kernel patch and will likely
>continue to suffer bit-rot as time goes by. c'est la vie.
Well, show them a real-world problem that it solves. Show a case where you
can't bill fairly without it and you can bill fairly with it. ... If you can.
DS
Lincoln Dale <[email protected]> said:
[...]
> but seeing as a HTTP cache is in essence glueing two TCP connections
> togethers, whats to say that the client wouldn't have had the TCP
> retransmissions in the first place? all i'm talking about is ensuring that
> application "accounting" information is accurate.
Strange charging model, if you ask me. If "somebody" hogs the connection,
causing my data streams to need retransmissions, I get charged. If I happen
to surf to <http://www.some.exciting.site.org> as the first customer today,
I get charged, everybody else gets it for free off the cache.
--
Dr. Horst H. von Brand User #22616 counter.li.org
Departamento de Informatica Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria +56 32 654239
Casilla 110-V, Valparaiso, Chile Fax: +56 32 797513
At 09:00 AM 12/06/2002 -0400, jamal wrote:
> > > > i know of many many folk who use transaction logs from HTTP caches for
> > > > volume-based billing.
> > > > right now, those bills are anywhere between 10% to 25% incorrect.
> > > >
> > > > you call that "extremely limited"?
> > >
> > >Surely, you must have better ways to do accounting than this -- otherwise
> > >you deserve to loose money.
> >
> > many people don't have better ways to do accounting than this.
>
>Then they dont care about loosing money.
>There's nothing _more important_ to a service provider than ability to do
>proper billing. Otherwise, they are a charity organization.
on this side of the planet (Australia), just about *all* service-providers
offer differentiated-billing baed on a volume-usage basis.
that includes Worldcom, Telstra, Optus (SingTel), connect.com.au (AAPT).
some of these differentiate themselves by using caching to provide faster
access and/or mitigate the latency overhead of simplex satellite.
this has been ongoing for many many many years now. please just accept
that HTTP caching is almost a necessity with the pricing models in use!
>There's nothing _more important_ to a service provider than ability to do
>proper billing. Otherwise, they are a charity organization.
we're almost talking about the same thing here -- and this is my point! i
agree that is is important - hence why i've added a getsockopt() option to
provide octet counters from the ip+tcp level!
> > in the case of Squid and Linux, they're typically using it because its
> > open-source and "free".
>
>I am hoping you didnt mean to say squid was only good because it has
>these perks.
not at all. they're using it because it meets their requirements.
once again, this is not a discussion about religion or politics!
> > they want to use HTTP Caching to save bandwidth (and therefore save money),
> > but they also live in a regime of volume-based billing. (not everywhere on
> > the planet is fixed-$/month for DSL).
> >
> > the unfortunate solution is to use HTTP Transaction logs, which count
> > payload at layer-7, not payload+headers+retransmissions at layer-3.
>
>Look at your own employers eqpt if you want to do this right.
>And then search around freshmeat so you dont reinvent the wheel.
once again, i respectfully disagree. while there are numerous technologies
for accounting out there (e.g. netflow), they all break down when you have
things like HTTP Persistent connections which may share a single
[server-side] connection with multiple [client-side] connections.
>And until you prove it is worth it and useful to other people then
>forever thats where it belongs. I now of nobody serious about billing
>who is using sockets stats as the transaction point.
you live in a country where the billing regeme is different.
> > lawn-mower support sounds like a userspace application to me.
>
>But we need a new system call support
(yes, i did take that comment as humerous before :-)).
if what i was proposing involved a new system-call then i agree that there
would be signficant pushback. what i have is a new getsockopt()
option. ie. in reality, no worse than getsockopt(..,TCP_INFO).
cheers,
lincoln.
It sounds like what you want is socket accounting which works like
process accounting. I.e when a socket lifetime ends, put out a record
with number of packets/bytes sent/received.
On Thu, 2002-06-13 at 17:24, Lincoln Dale wrote:
> At 09:00 AM 12/06/2002 -0400, jamal wrote:
> > > > > i know of many many folk who use transaction logs from HTTP caches for
> > > > > volume-based billing.
> > > > > right now, those bills are anywhere between 10% to 25% incorrect.
> > > > >
> > > > > you call that "extremely limited"?
> > > >
> > > >Surely, you must have better ways to do accounting than this -- otherwise
> > > >you deserve to loose money.
> > >
> > > many people don't have better ways to do accounting than this.
> >
> >Then they dont care about loosing money.
> >There's nothing _more important_ to a service provider than ability to do
> >proper billing. Otherwise, they are a charity organization.
>
> on this side of the planet (Australia), just about *all* service-providers
> offer differentiated-billing baed on a volume-usage basis.
> that includes Worldcom, Telstra, Optus (SingTel), connect.com.au (AAPT).
> some of these differentiate themselves by using caching to provide faster
> access and/or mitigate the latency overhead of simplex satellite.
> this has been ongoing for many many many years now. please just accept
> that HTTP caching is almost a necessity with the pricing models in use!
>
> >There's nothing _more important_ to a service provider than ability to do
> >proper billing. Otherwise, they are a charity organization.
>
> we're almost talking about the same thing here -- and this is my point! i
> agree that is is important - hence why i've added a getsockopt() option to
> provide octet counters from the ip+tcp level!
>
> > > in the case of Squid and Linux, they're typically using it because its
> > > open-source and "free".
> >
> >I am hoping you didnt mean to say squid was only good because it has
> >these perks.
>
> not at all. they're using it because it meets their requirements.
> once again, this is not a discussion about religion or politics!
>
> > > they want to use HTTP Caching to save bandwidth (and therefore save money),
> > > but they also live in a regime of volume-based billing. (not everywhere on
> > > the planet is fixed-$/month for DSL).
> > >
> > > the unfortunate solution is to use HTTP Transaction logs, which count
> > > payload at layer-7, not payload+headers+retransmissions at layer-3.
> >
> >Look at your own employers eqpt if you want to do this right.
> >And then search around freshmeat so you dont reinvent the wheel.
>
> once again, i respectfully disagree. while there are numerous technologies
> for accounting out there (e.g. netflow), they all break down when you have
> things like HTTP Persistent connections which may share a single
> [server-side] connection with multiple [client-side] connections.
>
> >And until you prove it is worth it and useful to other people then
> >forever thats where it belongs. I now of nobody serious about billing
> >who is using sockets stats as the transaction point.
>
> you live in a country where the billing regeme is different.
>
> > > lawn-mower support sounds like a userspace application to me.
> >
> >But we need a new system call support
>
> (yes, i did take that comment as humerous before :-)).
>
> if what i was proposing involved a new system-call then i agree that there
> would be signficant pushback. what i have is a new getsockopt()
> option. ie. in reality, no worse than getsockopt(..,TCP_INFO).
>
>
> cheers,
>
> lincoln.
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
Stephen Hemminger wrote:
> It sounds like what you want is socket accounting which works like
> process accounting. I.e when a socket lifetime ends, put out a record
> with number of packets/bytes sent/received.
Runtime is much more interesting to me. However, if you are keeping
enough information to do the accounting as you suggest, then it would
be trivial to make it available incrementally over the life of the
socket.
Billing is not the only interesting aspect of this. It is also good for
any program trying to dynamically tune or understand the lower-level
characteristics of a particular routing path or interface.
Ben
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear