2008-10-10 16:53:44

by Alex Bennee

[permalink] [raw]
Subject: rtl8169_get_stats seems to be adding random crap to rx_missed_errors count

Hi,

I've been seeing odd behaviour in the ifconfig output for my rtl8169
based network device. The ifconfig output had a very high number of
dropped packets in the display which changed massively every time I
checked. I tracked it down to /proc/net/dev and the a few test patches
it seems the rc_missed field is the culprit. I did a test patch of the
form:

+++ b/drivers/net/r8169.c
@@ -3062,10 +3062,16 @@ static struct net_device_stats
*rtl8169_get_stats(struct net_device *dev)
unsigned long flags;

if (netif_running(dev)) {
+ u32 missed;
spin_lock_irqsave(&tp->lock, flags);
- dev->stats.rx_missed_errors += RTL_R32(RxMissed);
+ missed = RTL_R32(RxMissed);
RTL_W32(RxMissed, 0);
spin_unlock_irqrestore(&tp->lock, flags);
+
+ dev->stats.rx_missed_errors += missed;
+ if (missed)
+ printk(KERN_INFO "rtl8169_get_stats added %d
to missed count\n", missed);
+
}

return &dev->stats;

And low and behold I see this behaviour:

[ 946.160188] rtl8169_get_stats added 250834943 to missed count
[ 946.160354] rtl8169_get_stats added 20768 to missed count
[ 946.160411] rtl8169_get_stats added 7032 to missed count
[ 946.160425] rtl8169_get_stats added 1677 to missed count
[ 946.160439] rtl8169_get_stats added 1577 to missed count
[ 946.160452] rtl8169_get_stats added 1543 to missed count
[ 946.160465] rtl8169_get_stats added 1517 to missed count
[ 946.160479] rtl8169_get_stats added 1572 to missed count
[ 946.160492] rtl8169_get_stats added 1586 to missed count
[ 946.160504] rtl8169_get_stats added 1284 to missed count
[ 948.164206] rtl8169_get_stats added 250864594 to missed count
[ 948.164374] rtl8169_get_stats added 21004 to missed count
[ 948.164434] rtl8169_get_stats added 7385 to missed count
[ 948.164447] rtl8169_get_stats added 1631 to missed count
[ 948.164461] rtl8169_get_stats added 1574 to missed count
[ 948.164475] rtl8169_get_stats added 1645 to missed count
[ 948.164489] rtl8169_get_stats added 1583 to missed count
[ 948.164503] rtl8169_get_stats added 1561 to missed count
[ 948.164523] rtl8169_get_stats added 2314 to missed count
[ 948.164534] rtl8169_get_stats added 1300 to missed count
[ 950.168182] rtl8169_get_stats added 250857857 to missed count
[ 950.168345] rtl8169_get_stats added 20465 to missed count
[ 950.168401] rtl8169_get_stats added 6903 to missed count
[ 950.168416] rtl8169_get_stats added 1711 to missed count
[ 950.168430] rtl8169_get_stats added 1585 to missed count
[ 950.168443] rtl8169_get_stats added 1532 to missed count
[ 950.168456] rtl8169_get_stats added 1520 to missed count
[ 950.168469] rtl8169_get_stats added 1539 to missed count
[ 950.168483] rtl8169_get_stats added 1588 to missed count
[ 950.168494] rtl8169_get_stats added 1282 to missed count

It looks very much like the number being read is a decreasing counter
rather than anything else.

Has anyone got a copy of the data sheet and can confirm that:

RxMissed = 0x4c,

is the correct offset for the register and it doesn't need any special
handling to be read?




--
Alex, homepage: http://www.bennee.com/~alex/



--
Alex, homepage: http://www.bennee.com/~alex/


2008-10-10 21:35:40

by Bruno Prémont

[permalink] [raw]
Subject: Re: rtl8169_get_stats seems to be adding random crap to rx_missed_errors count

Hi Alex,

This issue has already been reported in
http://lkml.org/lkml/2008/10/9/235
and a patch exists.

Could you verify that patch #0009 at URL below fixes the issue for you:
http://userweb.kernel.org/~romieu/r8169/2.6.27-rc9/20081007/


Bruno


On Fri, 10 October 2008 "Alex Bennee" <[email protected]> wrote:
> Hi,
>
> I've been seeing odd behaviour in the ifconfig output for my rtl8169
> based network device. The ifconfig output had a very high number of
> dropped packets in the display which changed massively every time I
> checked. I tracked it down to /proc/net/dev and the a few test patches
> it seems the rc_missed field is the culprit. I did a test patch of the
> form:
>
> +++ b/drivers/net/r8169.c
> @@ -3062,10 +3062,16 @@ static struct net_device_stats
> *rtl8169_get_stats(struct net_device *dev)
> unsigned long flags;
>
> if (netif_running(dev)) {
> + u32 missed;
> spin_lock_irqsave(&tp->lock, flags);
> - dev->stats.rx_missed_errors += RTL_R32(RxMissed);
> + missed = RTL_R32(RxMissed);
> RTL_W32(RxMissed, 0);
> spin_unlock_irqrestore(&tp->lock, flags);
> +
> + dev->stats.rx_missed_errors += missed;
> + if (missed)
> + printk(KERN_INFO "rtl8169_get_stats added %d
> to missed count\n", missed);
> +
> }
>
> return &dev->stats;
>
> And low and behold I see this behaviour:
>
> [ 946.160188] rtl8169_get_stats added 250834943 to missed count
> [ 946.160354] rtl8169_get_stats added 20768 to missed count
> [ 946.160411] rtl8169_get_stats added 7032 to missed count
> [ 946.160425] rtl8169_get_stats added 1677 to missed count
> [ 946.160439] rtl8169_get_stats added 1577 to missed count
> [ 946.160452] rtl8169_get_stats added 1543 to missed count
> [ 946.160465] rtl8169_get_stats added 1517 to missed count
> [ 946.160479] rtl8169_get_stats added 1572 to missed count
> [ 946.160492] rtl8169_get_stats added 1586 to missed count
> [ 946.160504] rtl8169_get_stats added 1284 to missed count
> [ 948.164206] rtl8169_get_stats added 250864594 to missed count
> [ 948.164374] rtl8169_get_stats added 21004 to missed count
> [ 948.164434] rtl8169_get_stats added 7385 to missed count
> [ 948.164447] rtl8169_get_stats added 1631 to missed count
> [ 948.164461] rtl8169_get_stats added 1574 to missed count
> [ 948.164475] rtl8169_get_stats added 1645 to missed count
> [ 948.164489] rtl8169_get_stats added 1583 to missed count
> [ 948.164503] rtl8169_get_stats added 1561 to missed count
> [ 948.164523] rtl8169_get_stats added 2314 to missed count
> [ 948.164534] rtl8169_get_stats added 1300 to missed count
> [ 950.168182] rtl8169_get_stats added 250857857 to missed count
> [ 950.168345] rtl8169_get_stats added 20465 to missed count
> [ 950.168401] rtl8169_get_stats added 6903 to missed count
> [ 950.168416] rtl8169_get_stats added 1711 to missed count
> [ 950.168430] rtl8169_get_stats added 1585 to missed count
> [ 950.168443] rtl8169_get_stats added 1532 to missed count
> [ 950.168456] rtl8169_get_stats added 1520 to missed count
> [ 950.168469] rtl8169_get_stats added 1539 to missed count
> [ 950.168483] rtl8169_get_stats added 1588 to missed count
> [ 950.168494] rtl8169_get_stats added 1282 to missed count
>
> It looks very much like the number being read is a decreasing counter
> rather than anything else.
>
> Has anyone got a copy of the data sheet and can confirm that:
>
> RxMissed = 0x4c,
>
> is the correct offset for the register and it doesn't need any special
> handling to be read?
>
>
>
>
> --
> Alex, homepage: http://www.bennee.com/~alex/

2008-10-11 07:02:48

by Alex Bennee

[permalink] [raw]
Subject: Re: rtl8169_get_stats seems to be adding random crap to rx_missed_errors count

On Fri, Oct 10, 2008 at 10:27 PM, Bruno Pr?mont
<[email protected]> wrote:
> Hi Alex,
>
> This issue has already been reported in
> http://lkml.org/lkml/2008/10/9/235
> and a patch exists.
>
> Could you verify that patch #0009 at URL below fixes the issue for you:
> http://userweb.kernel.org/~romieu/r8169/2.6.27-rc9/20081007/

I can confirm that patch works for me. I have further reviewed the
code and it looks OK to me from here. Should I just forward the patch
and tag it to the mailing list or is it now in a tree that gets picked
up?

--
Alex, homepage: http://www.bennee.com/~alex/

2008-10-11 08:49:23

by Alex Bennee

[permalink] [raw]
Subject: Re: rtl8169_get_stats seems to be adding random crap to rx_missed_errors count

On Sat, Oct 11, 2008 at 7:42 AM, Alex Bennee <[email protected]> wrote:
> On Fri, Oct 10, 2008 at 10:27 PM, Bruno Pr?mont
> <[email protected]> wrote:
>> Hi Alex,
>>
>> This issue has already been reported in
>> http://lkml.org/lkml/2008/10/9/235
>> and a patch exists.
>>
>> Could you verify that patch #0009 at URL below fixes the issue for you:
>> http://userweb.kernel.org/~romieu/r8169/2.6.27-rc9/20081007/
>
> I can confirm that patch works for me. I have further reviewed the
> code and it looks OK to me from here. Should I just forward the patch
> and tag it to the mailing list or is it now in a tree that gets picked
> up?


>From 96b79ad3d9e8c15a0118810e91f0e28ce5575493 Mon Sep 17 00:00:00 2001
From: Francois Romieu <[email protected]>
Date: Wed, 10 Sep 2008 22:28:56 +0200
Subject: [PATCH] r8169: fix RxMissed register access

- the register is defined for the 8169 chipset only and there is
no 8169 beyond RTL_GIGA_MAC_VER_06.
- only the lower 3 bytes of the register are valid

Fixes:
1. http://bugzilla.kernel.org/show_bug.cgi?id=10180
2. http://bugzilla.kernel.org/show_bug.cgi?id=11062 (bits of)

Tested by Hermann Gausterer and Adam Huffman.

Signed-off-by: Francois Romieu <[email protected]>
Cc: Edward Hsu <[email protected]>
Signed-off-by: Jeff Garzik <[email protected]>
Tested-by: Alex Bennee <[email protected]>
---
drivers/net/r8169.c | 25 ++++++++++++++-----------
1 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 0f6f974..4190ee7 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2099,8 +2099,6 @@ static void rtl_hw_start_8168(struct net_device *dev)

RTL_R8(IntrMask);

- RTL_W32(RxMissed, 0);
-
rtl_set_rx_mode(dev);

RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
@@ -2143,8 +2141,6 @@ static void rtl_hw_start_8101(struct net_device *dev)

RTL_R8(IntrMask);

- RTL_W32(RxMissed, 0);
-
rtl_set_rx_mode(dev);

RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
@@ -2922,6 +2918,17 @@ static int rtl8169_poll(struct napi_struct
*napi, int budget)
return work_done;
}

+static void rtl8169_rx_missed(struct net_device *dev, void __iomem *ioaddr)
+{
+ struct rtl8169_private *tp = netdev_priv(dev);
+
+ if (tp->mac_version > RTL_GIGA_MAC_VER_06)
+ return;
+
+ dev->stats.rx_missed_errors += (RTL_R32(RxMissed) & 0xffffff);
+ RTL_W32(RxMissed, 0);
+}
+
static void rtl8169_down(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
@@ -2939,9 +2946,7 @@ core_down:

rtl8169_asic_down(ioaddr);

- /* Update the error counts. */
- dev->stats.rx_missed_errors += RTL_R32(RxMissed);
- RTL_W32(RxMissed, 0);
+ rtl8169_rx_missed(dev, ioaddr);

spin_unlock_irq(&tp->lock);

@@ -3063,8 +3068,7 @@ static struct net_device_stats
*rtl8169_get_stats(struct net_device *dev)

if (netif_running(dev)) {
spin_lock_irqsave(&tp->lock, flags);
- dev->stats.rx_missed_errors += RTL_R32(RxMissed);
- RTL_W32(RxMissed, 0);
+ rtl8169_rx_missed(dev, ioaddr);
spin_unlock_irqrestore(&tp->lock, flags);
}

@@ -3089,8 +3093,7 @@ static int rtl8169_suspend(struct pci_dev *pdev,
pm_message_t state)

rtl8169_asic_down(ioaddr);

- dev->stats.rx_missed_errors += RTL_R32(RxMissed);
- RTL_W32(RxMissed, 0);
+ rtl8169_rx_missed(dev, ioaddr);

spin_unlock_irq(&tp->lock);

--
1.5.6.4

2008-10-11 16:52:13

by Francois Romieu

[permalink] [raw]
Subject: Re: rtl8169_get_stats seems to be adding random crap to rx_missed_errors count

Alex Bennee <[email protected]> :
[...]

The fix is in davem's net-next tree. David has sent a pull request
yesterday so the fix ought to be in 2.6.28.

Thanks for your testing.

--
Ueimor