LinuxLists.cc - [PATCH AUTOSEL 6.7 021/108] r8169: improve RTL8411b phy-down fixup

2024-01-16 19:48:53

Subject: [PATCH AUTOSEL 6.7 021/108] r8169: improve RTL8411b phy-down fixup

From: Heiner Kallweit <[email protected]>

[ Upstream commit 055dd7511f675d26fa283b35bb3dadfc7f77ed97 ]

Mirsad proposed a patch to reduce the number of spinlock lock/unlock
operations and the function code size. This can be further improved
because the function sets a consecutive register block.

Suggested-by: Mirsad Todorovac <[email protected]>
Signed-off-by: Heiner Kallweit <[email protected]>
Reviewed-by: Simon Horman <[email protected]>
Reviewed-by: Mirsad Todorovac <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
---
drivers/net/ethernet/realtek/r8169_main.c | 139 +++++-----------------
1 file changed, 28 insertions(+), 111 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 81fd31f6fac4..1664ca8fa14f 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3106,6 +3106,33 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
rtl_ephy_init(tp, e_info_8168g_2);
}

+static void rtl8411b_fix_phy_down(struct rtl8169_private *tp)
+{
+ static const u16 fix_data[] = {
+/* 0xf800 */ 0xe008, 0xe00a, 0xe00c, 0xe00e, 0xe027, 0xe04f, 0xe05e, 0xe065,
+/* 0xf810 */ 0xc602, 0xbe00, 0x0000, 0xc502, 0xbd00, 0x074c, 0xc302, 0xbb00,
+/* 0xf820 */ 0x080a, 0x6420, 0x48c2, 0x8c20, 0xc516, 0x64a4, 0x49c0, 0xf009,
+/* 0xf830 */ 0x74a2, 0x8ca5, 0x74a0, 0xc50e, 0x9ca2, 0x1c11, 0x9ca0, 0xe006,
+/* 0xf840 */ 0x74f8, 0x48c4, 0x8cf8, 0xc404, 0xbc00, 0xc403, 0xbc00, 0x0bf2,
+/* 0xf850 */ 0x0c0a, 0xe434, 0xd3c0, 0x49d9, 0xf01f, 0xc526, 0x64a5, 0x1400,
+/* 0xf860 */ 0xf007, 0x0c01, 0x8ca5, 0x1c15, 0xc51b, 0x9ca0, 0xe013, 0xc519,
+/* 0xf870 */ 0x74a0, 0x48c4, 0x8ca0, 0xc516, 0x74a4, 0x48c8, 0x48ca, 0x9ca4,
+/* 0xf880 */ 0xc512, 0x1b00, 0x9ba0, 0x1b1c, 0x483f, 0x9ba2, 0x1b04, 0xc508,
+/* 0xf890 */ 0x9ba0, 0xc505, 0xbd00, 0xc502, 0xbd00, 0x0300, 0x051e, 0xe434,
+/* 0xf8a0 */ 0xe018, 0xe092, 0xde20, 0xd3c0, 0xc50f, 0x76a4, 0x49e3, 0xf007,
+/* 0xf8b0 */ 0x49c0, 0xf103, 0xc607, 0xbe00, 0xc606, 0xbe00, 0xc602, 0xbe00,
+/* 0xf8c0 */ 0x0c4c, 0x0c28, 0x0c2c, 0xdc00, 0xc707, 0x1d00, 0x8de2, 0x48c1,
+/* 0xf8d0 */ 0xc502, 0xbd00, 0x00aa, 0xe0c0, 0xc502, 0xbd00, 0x0132
+ };
+ unsigned long flags;
+ int i;
+
+ raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ for (i = 0; i < ARRAY_SIZE(fix_data); i++)
+ __r8168_mac_ocp_write(tp, 0xf800 + 2 * i, fix_data[i]);
+ raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+}
+
static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
{
static const struct ephy_info e_info_8411_2[] = {
@@ -3139,117 +3166,7 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
mdelay(3);
r8168_mac_ocp_write(tp, 0xFC26, 0x0000);

- r8168_mac_ocp_write(tp, 0xF800, 0xE008);
- r8168_mac_ocp_write(tp, 0xF802, 0xE00A);
- r8168_mac_ocp_write(tp, 0xF804, 0xE00C);
- r8168_mac_ocp_write(tp, 0xF806, 0xE00E);
- r8168_mac_ocp_write(tp, 0xF808, 0xE027);
- r8168_mac_ocp_write(tp, 0xF80A, 0xE04F);
- r8168_mac_ocp_write(tp, 0xF80C, 0xE05E);
- r8168_mac_ocp_write(tp, 0xF80E, 0xE065);
- r8168_mac_ocp_write(tp, 0xF810, 0xC602);
- r8168_mac_ocp_write(tp, 0xF812, 0xBE00);
- r8168_mac_ocp_write(tp, 0xF814, 0x0000);
- r8168_mac_ocp_write(tp, 0xF816, 0xC502);
- r8168_mac_ocp_write(tp, 0xF818, 0xBD00);
- r8168_mac_ocp_write(tp, 0xF81A, 0x074C);
- r8168_mac_ocp_write(tp, 0xF81C, 0xC302);
- r8168_mac_ocp_write(tp, 0xF81E, 0xBB00);
- r8168_mac_ocp_write(tp, 0xF820, 0x080A);
- r8168_mac_ocp_write(tp, 0xF822, 0x6420);
- r8168_mac_ocp_write(tp, 0xF824, 0x48C2);
- r8168_mac_ocp_write(tp, 0xF826, 0x8C20);
- r8168_mac_ocp_write(tp, 0xF828, 0xC516);
- r8168_mac_ocp_write(tp, 0xF82A, 0x64A4);
- r8168_mac_ocp_write(tp, 0xF82C, 0x49C0);
- r8168_mac_ocp_write(tp, 0xF82E, 0xF009);
- r8168_mac_ocp_write(tp, 0xF830, 0x74A2);
- r8168_mac_ocp_write(tp, 0xF832, 0x8CA5);
- r8168_mac_ocp_write(tp, 0xF834, 0x74A0);
- r8168_mac_ocp_write(tp, 0xF836, 0xC50E);
- r8168_mac_ocp_write(tp, 0xF838, 0x9CA2);
- r8168_mac_ocp_write(tp, 0xF83A, 0x1C11);
- r8168_mac_ocp_write(tp, 0xF83C, 0x9CA0);
- r8168_mac_ocp_write(tp, 0xF83E, 0xE006);
- r8168_mac_ocp_write(tp, 0xF840, 0x74F8);
- r8168_mac_ocp_write(tp, 0xF842, 0x48C4);
- r8168_mac_ocp_write(tp, 0xF844, 0x8CF8);
- r8168_mac_ocp_write(tp, 0xF846, 0xC404);
- r8168_mac_ocp_write(tp, 0xF848, 0xBC00);
- r8168_mac_ocp_write(tp, 0xF84A, 0xC403);
- r8168_mac_ocp_write(tp, 0xF84C, 0xBC00);
- r8168_mac_ocp_write(tp, 0xF84E, 0x0BF2);
- r8168_mac_ocp_write(tp, 0xF850, 0x0C0A);
- r8168_mac_ocp_write(tp, 0xF852, 0xE434);
- r8168_mac_ocp_write(tp, 0xF854, 0xD3C0);
- r8168_mac_ocp_write(tp, 0xF856, 0x49D9);
- r8168_mac_ocp_write(tp, 0xF858, 0xF01F);
- r8168_mac_ocp_write(tp, 0xF85A, 0xC526);
- r8168_mac_ocp_write(tp, 0xF85C, 0x64A5);
- r8168_mac_ocp_write(tp, 0xF85E, 0x1400);
- r8168_mac_ocp_write(tp, 0xF860, 0xF007);
- r8168_mac_ocp_write(tp, 0xF862, 0x0C01);
- r8168_mac_ocp_write(tp, 0xF864, 0x8CA5);
- r8168_mac_ocp_write(tp, 0xF866, 0x1C15);
- r8168_mac_ocp_write(tp, 0xF868, 0xC51B);
- r8168_mac_ocp_write(tp, 0xF86A, 0x9CA0);
- r8168_mac_ocp_write(tp, 0xF86C, 0xE013);
- r8168_mac_ocp_write(tp, 0xF86E, 0xC519);
- r8168_mac_ocp_write(tp, 0xF870, 0x74A0);
- r8168_mac_ocp_write(tp, 0xF872, 0x48C4);
- r8168_mac_ocp_write(tp, 0xF874, 0x8CA0);
- r8168_mac_ocp_write(tp, 0xF876, 0xC516);
- r8168_mac_ocp_write(tp, 0xF878, 0x74A4);
- r8168_mac_ocp_write(tp, 0xF87A, 0x48C8);
- r8168_mac_ocp_write(tp, 0xF87C, 0x48CA);
- r8168_mac_ocp_write(tp, 0xF87E, 0x9CA4);
- r8168_mac_ocp_write(tp, 0xF880, 0xC512);
- r8168_mac_ocp_write(tp, 0xF882, 0x1B00);
- r8168_mac_ocp_write(tp, 0xF884, 0x9BA0);
- r8168_mac_ocp_write(tp, 0xF886, 0x1B1C);
- r8168_mac_ocp_write(tp, 0xF888, 0x483F);
- r8168_mac_ocp_write(tp, 0xF88A, 0x9BA2);
- r8168_mac_ocp_write(tp, 0xF88C, 0x1B04);
- r8168_mac_ocp_write(tp, 0xF88E, 0xC508);
- r8168_mac_ocp_write(tp, 0xF890, 0x9BA0);
- r8168_mac_ocp_write(tp, 0xF892, 0xC505);
- r8168_mac_ocp_write(tp, 0xF894, 0xBD00);
- r8168_mac_ocp_write(tp, 0xF896, 0xC502);
- r8168_mac_ocp_write(tp, 0xF898, 0xBD00);
- r8168_mac_ocp_write(tp, 0xF89A, 0x0300);
- r8168_mac_ocp_write(tp, 0xF89C, 0x051E);
- r8168_mac_ocp_write(tp, 0xF89E, 0xE434);
- r8168_mac_ocp_write(tp, 0xF8A0, 0xE018);
- r8168_mac_ocp_write(tp, 0xF8A2, 0xE092);
- r8168_mac_ocp_write(tp, 0xF8A4, 0xDE20);
- r8168_mac_ocp_write(tp, 0xF8A6, 0xD3C0);
- r8168_mac_ocp_write(tp, 0xF8A8, 0xC50F);
- r8168_mac_ocp_write(tp, 0xF8AA, 0x76A4);
- r8168_mac_ocp_write(tp, 0xF8AC, 0x49E3);
- r8168_mac_ocp_write(tp, 0xF8AE, 0xF007);
- r8168_mac_ocp_write(tp, 0xF8B0, 0x49C0);
- r8168_mac_ocp_write(tp, 0xF8B2, 0xF103);
- r8168_mac_ocp_write(tp, 0xF8B4, 0xC607);
- r8168_mac_ocp_write(tp, 0xF8B6, 0xBE00);
- r8168_mac_ocp_write(tp, 0xF8B8, 0xC606);
- r8168_mac_ocp_write(tp, 0xF8BA, 0xBE00);
- r8168_mac_ocp_write(tp, 0xF8BC, 0xC602);
- r8168_mac_ocp_write(tp, 0xF8BE, 0xBE00);
- r8168_mac_ocp_write(tp, 0xF8C0, 0x0C4C);
- r8168_mac_ocp_write(tp, 0xF8C2, 0x0C28);
- r8168_mac_ocp_write(tp, 0xF8C4, 0x0C2C);
- r8168_mac_ocp_write(tp, 0xF8C6, 0xDC00);
- r8168_mac_ocp_write(tp, 0xF8C8, 0xC707);
- r8168_mac_ocp_write(tp, 0xF8CA, 0x1D00);
- r8168_mac_ocp_write(tp, 0xF8CC, 0x8DE2);
- r8168_mac_ocp_write(tp, 0xF8CE, 0x48C1);
- r8168_mac_ocp_write(tp, 0xF8D0, 0xC502);
- r8168_mac_ocp_write(tp, 0xF8D2, 0xBD00);
- r8168_mac_ocp_write(tp, 0xF8D4, 0x00AA);
- r8168_mac_ocp_write(tp, 0xF8D6, 0xE0C0);
- r8168_mac_ocp_write(tp, 0xF8D8, 0xC502);
- r8168_mac_ocp_write(tp, 0xF8DA, 0xBD00);
- r8168_mac_ocp_write(tp, 0xF8DC, 0x0132);
+ rtl8411b_fix_phy_down(tp);

r8168_mac_ocp_write(tp, 0xFC26, 0x8000);

--
2.43.0

2024-01-17 01:43:34

by Jakub Kicinski

[permalink] [raw]

Subject: Re: [PATCH AUTOSEL 6.7 021/108] r8169: improve RTL8411b phy-down fixup

On Tue, 16 Jan 2024 14:38:47 -0500 Sasha Levin wrote:
> Mirsad proposed a patch to reduce the number of spinlock lock/unlock
> operations and the function code size. This can be further improved
> because the function sets a consecutive register block.

Clearly a noop and a lot of LoC changed. I vote to drop this from
the backport.

2024-01-17 10:32:29

by Mirsad Todorovac

[permalink] [raw]

Subject: Re: [PATCH AUTOSEL 6.7 021/108] r8169: improve RTL8411b phy-down fixup

On 1/17/24 02:43, Jakub Kicinski wrote:
> On Tue, 16 Jan 2024 14:38:47 -0500 Sasha Levin wrote:
>> Mirsad proposed a patch to reduce the number of spinlock lock/unlock
>> operations and the function code size. This can be further improved
>> because the function sets a consecutive register block.
>
> Clearly a noop and a lot of LoC changed. I vote to drop this from
> the backport.

Dear Jakub,

I will not argue with a senior developer, but please let me plead for the
cause.

There are a couple of issues here:

1. Heiner's patch generates smaller and faster code, with 100+
spin_lock_irqsave()/spin_unlock_restore() pairs less.

According to this table:

[1] https://mirrors.edge.kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook-1c.2023.06.11a.pdf#table.3.1

The cost of single lock can be 15.4 - 101.9 ns (for the example CPU),
so total savings would be 1709 - 11310 ns. But as the event of PHY power
down is not frequent, this might be a insignificant saving indeed.

2. Why I had advertised atomic programming of RTL registers in the first
place?

The mac_ocp_lock was introduced recently:

commit 91c8643578a21e435c412ffbe902bb4b4773e262
Author: Heiner Kallweit <[email protected]>
Date: Mon Mar 6 22:23:15 2023 +0100

r8169: use spinlock to protect mac ocp register access

For disabling ASPM during NAPI poll we'll have to access mac ocp
registers in atomic context. This could result in races because
a mac ocp read consists of a write to register OCPDR, followed
by a read from the same register. Therefore add a spinlock to
protect access to mac ocp registers.

Reviewed-by: Simon Horman <[email protected]>
Tested-by: Kai-Heng Feng <[email protected]>
Tested-by: Holger Hoffstätte <[email protected]>
Signed-off-by: Heiner Kallweit <[email protected]>
Signed-off-by: David S. Miller <[email protected]>

Well, the answer is in the question - the very need for protecting the access
to RTL_W(8|16|32) with locks comes from the fact that something was accessing
the RTL card asynchronously.

Forgive me if this is a stupid question ...

Now - do we have a guarantee that the card will not be used asynchronously
half-programmed from something else in that case, leading to another spurious
lockup?

IMHO, shouldn't the entire reprogramming of PHY down recovery of the RTL 8411b
be done atomically, under a single spin_lock_irqsave()/spin_unlock_irqrestore()
pair?

Best regards,
Mirsad Todorovac

--
CARNet system engineer
Faculty of Graphic Arts | Academy of Fine Arts
University of Zagreb

CARNet sistem inženjer
Grafički fakultet | Akademija likovnih umjetnosti
Sveučilište u Zagrebu

2024-01-17 11:10:21

by Heiner Kallweit

[permalink] [raw]

Subject: Re: [PATCH AUTOSEL 6.7 021/108] r8169: improve RTL8411b phy-down fixup

On 17.01.2024 11:30, Mirsad Todorovac wrote:
> On 1/17/24 02:43, Jakub Kicinski wrote:
>> On Tue, 16 Jan 2024 14:38:47 -0500 Sasha Levin wrote:
>>> Mirsad proposed a patch to reduce the number of spinlock lock/unlock
>>> operations and the function code size. This can be further improved
>>> because the function sets a consecutive register block.
>>
>> Clearly a noop and a lot of LoC changed. I vote to drop this from
>> the backport.
>
> Dear Jakub,
>
> I will not argue with a senior developer, but please let me plead for the
> cause.
>
> There are a couple of issues here:
>
> 1. Heiner's patch generates smaller and faster code, with 100+
> spin_lock_irqsave()/spin_unlock_restore() pairs less.
>
> According to this table:
>
> [1] https://mirrors.edge.kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook-1c.2023.06.11a.pdf#table.3.1
>
> The cost of single lock can be 15.4 - 101.9 ns (for the example CPU),
> so total savings would be 1709 - 11310 ns. But as the event of PHY power
> down is not frequent, this might be a insignificant saving indeed.
>
> 2. Why I had advertised atomic programming of RTL registers in the first
> place?
>
> The mac_ocp_lock was introduced recently:
>
> commit 91c8643578a21e435c412ffbe902bb4b4773e262
> Author: Heiner Kallweit <[email protected]>
> Date:   Mon Mar 6 22:23:15 2023 +0100
>
>     r8169: use spinlock to protect mac ocp register access
>
>     For disabling ASPM during NAPI poll we'll have to access mac ocp
>     registers in atomic context. This could result in races because
>     a mac ocp read consists of a write to register OCPDR, followed
>     by a read from the same register. Therefore add a spinlock to
>     protect access to mac ocp registers.
>
>     Reviewed-by: Simon Horman <[email protected]>
>     Tested-by: Kai-Heng Feng <[email protected]>
>     Tested-by: Holger Hoffstätte <[email protected]>
>     Signed-off-by: Heiner Kallweit <[email protected]>
>     Signed-off-by: David S. Miller <[email protected]>
>
> Well, the answer is in the question - the very need for protecting the access
> to RTL_W(8|16|32) with locks comes from the fact that something was accessing
> the RTL card asynchronously.
>
> Forgive me if this is a stupid question ...
>
> Now - do we have a guarantee that the card will not be used asynchronously
> half-programmed from something else in that case, leading to another spurious
> lockup?
>
> IMHO, shouldn't the entire reprogramming of PHY down recovery of the RTL 8411b
> be done atomically, under a single spin_lock_irqsave()/spin_unlock_irqrestore()
> pair?
>

There's no actual issue that requires fixing. It's an improvement.

> Best regards,
> Mirsad Todorovac
>

2024-01-17 13:45:17

by Andrew Lunn

[permalink] [raw]

Subject: Re: [PATCH AUTOSEL 6.7 021/108] r8169: improve RTL8411b phy-down fixup

On Wed, Jan 17, 2024 at 11:30:53AM +0100, Mirsad Todorovac wrote:
> On 1/17/24 02:43, Jakub Kicinski wrote:
> > On Tue, 16 Jan 2024 14:38:47 -0500 Sasha Levin wrote:
> > > Mirsad proposed a patch to reduce the number of spinlock lock/unlock
> > > operations and the function code size. This can be further improved
> > > because the function sets a consecutive register block.
> >
> > Clearly a noop and a lot of LoC changed. I vote to drop this from
> > the backport.
>
> Dear Jakub,
>
> I will not argue with a senior developer, but please let me plead for the
> cause.
>
> There are a couple of issues here:
>
> 1. Heiner's patch generates smaller and faster code, with 100+
> spin_lock_irqsave()/spin_unlock_restore() pairs less.
>
> According to this table:
>
> [1] https://mirrors.edge.kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook-1c.2023.06.11a.pdf#table.3.1
>
> The cost of single lock can be 15.4 - 101.9 ns (for the example CPU),
> so total savings would be 1709 - 11310 ns. But as the event of PHY power
> down is not frequent, this might be a insignificant saving indeed.
>
> 2. Why I had advertised atomic programming of RTL registers in the first
> place?
>
> The mac_ocp_lock was introduced recently:
>
> commit 91c8643578a21e435c412ffbe902bb4b4773e262
> Author: Heiner Kallweit <[email protected]>
> Date: Mon Mar 6 22:23:15 2023 +0100
>
> r8169: use spinlock to protect mac ocp register access
>
> For disabling ASPM during NAPI poll we'll have to access mac ocp
> registers in atomic context. This could result in races because
> a mac ocp read consists of a write to register OCPDR, followed
> by a read from the same register. Therefore add a spinlock to
> protect access to mac ocp registers.
>
> Reviewed-by: Simon Horman <[email protected]>
> Tested-by: Kai-Heng Feng <[email protected]>
> Tested-by: Holger Hoffst?tte <[email protected]>
> Signed-off-by: Heiner Kallweit <[email protected]>
> Signed-off-by: David S. Miller <[email protected]>
>
> Well, the answer is in the question - the very need for protecting the access
> to RTL_W(8|16|32) with locks comes from the fact that something was accessing
> the RTL card asynchronously.
>
> Forgive me if this is a stupid question ...
>
> Now - do we have a guarantee that the card will not be used asynchronously
> half-programmed from something else in that case, leading to another spurious
> lockup?
>
> IMHO, shouldn't the entire reprogramming of PHY down recovery of the RTL 8411b
> be done atomically, under a single spin_lock_irqsave()/spin_unlock_irqrestore()
> pair?

Hi Mirsad

Please take a read of:

https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html

Do you think this patch fulfils these criteria? In particularly, "It
must either fix a real bug that bothers people...".

I agree with Heiner, this appears to be just an optimisation,

Andrew

2024-01-17 16:36:43

by Mirsad Todorovac

[permalink] [raw]

Subject: Re: [PATCH AUTOSEL 6.7 021/108] r8169: improve RTL8411b phy-down fixup

On 1/17/24 14:44, Andrew Lunn wrote:
> On Wed, Jan 17, 2024 at 11:30:53AM +0100, Mirsad Todorovac wrote:
>> On 1/17/24 02:43, Jakub Kicinski wrote:
>>> On Tue, 16 Jan 2024 14:38:47 -0500 Sasha Levin wrote:
>>>> Mirsad proposed a patch to reduce the number of spinlock lock/unlock
>>>> operations and the function code size. This can be further improved
>>>> because the function sets a consecutive register block.
>>>
>>> Clearly a noop and a lot of LoC changed. I vote to drop this from
>>> the backport.
>>
>> Dear Jakub,
>>
>> I will not argue with a senior developer, but please let me plead for the
>> cause.
>>
>> There are a couple of issues here:
>>
>> 1. Heiner's patch generates smaller and faster code, with 100+
>> spin_lock_irqsave()/spin_unlock_restore() pairs less.
>>
>> According to this table:
>>
>> [1] https://mirrors.edge.kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook-1c.2023.06.11a.pdf#table.3.1
>>
>> The cost of single lock can be 15.4 - 101.9 ns (for the example CPU),
>> so total savings would be 1709 - 11310 ns. But as the event of PHY power
>> down is not frequent, this might be a insignificant saving indeed.
>>
>> 2. Why I had advertised atomic programming of RTL registers in the first
>> place?
>>
>> The mac_ocp_lock was introduced recently:
>>
>> commit 91c8643578a21e435c412ffbe902bb4b4773e262
>> Author: Heiner Kallweit <[email protected]>
>> Date: Mon Mar 6 22:23:15 2023 +0100
>>
>> r8169: use spinlock to protect mac ocp register access
>>
>> For disabling ASPM during NAPI poll we'll have to access mac ocp
>> registers in atomic context. This could result in races because
>> a mac ocp read consists of a write to register OCPDR, followed
>> by a read from the same register. Therefore add a spinlock to
>> protect access to mac ocp registers.
>>
>> Reviewed-by: Simon Horman <[email protected]>
>> Tested-by: Kai-Heng Feng <[email protected]>
>> Tested-by: Holger Hoffstätte <[email protected]>
>> Signed-off-by: Heiner Kallweit <[email protected]>
>> Signed-off-by: David S. Miller <[email protected]>
>>
>> Well, the answer is in the question - the very need for protecting the access
>> to RTL_W(8|16|32) with locks comes from the fact that something was accessing
>> the RTL card asynchronously.
>>
>> Forgive me if this is a stupid question ...
>>
>> Now - do we have a guarantee that the card will not be used asynchronously
>> half-programmed from something else in that case, leading to another spurious
>> lockup?
>>
>> IMHO, shouldn't the entire reprogramming of PHY down recovery of the RTL 8411b
>> be done atomically, under a single spin_lock_irqsave()/spin_unlock_irqrestore()
>> pair?
>
> Hi Mirsad
>
> Please take a read of:
>
> https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
>
> Do you think this patch fulfils these criteria? In particularly, "It
> must either fix a real bug that bothers people...".
>
> I agree with Heiner, this appears to be just an optimisation,

Hi Andrew,

Yes, I wasn't aware of the 100 lines limit, and yes, this is not a bug fix,
but an improvement (optimisation).

I think by this I can join to consensus, this patch is not a candidate for
backporting. :-/

However, I am concerned about the possibility of two kernel threads accessing
the RTL NIC intermittently attempting to program the NIC over the RTL_(R|W)(8|16|32)
instructions (which are expanded to readl/writel and assembly).

AFAICS, nothing prevents two (or more) threads to decide in unlikely but
possible case to program the card at the same time. (Do we have a guard lock
against this?)

mac_ocp_lock appears to be acquired and released for each RTL_(R|W)(8|16|32),
with the exception of r8168_mac_ocp_modify().

To be true to the facts, each byte will go to the right port due to address/data
pairs used in each call - however, I am worried whether there could be a scenario
like this:

CPU 1 CPU 2

start programming NIC
programming NIC
(preempted in spin_lock_irqsave()
start programming NIC
programming NIC
programming NIC
programming NIC
preempted in spin_lock_irqsave()
(resume control in spin_unlock_irqrestore()
programming NIC
programming NIC
(preempted in spin_lock_irqsave()
continue programming NIC
programming NIC
programming NIC
end programming NIC
(resume control in spin_unlock_irqrestore()
programming NIC
end programming NIC

Now, every byte, word or longword will come to the right place, thanks to
RTL_(R|W)(8|16|32) having the address/data pairs - but I worry that this
jumping from sequence to sequence might confuse the NIC.

I mean, if those latches behind the addresses cause some physical effect, maybe
the ORDER is also important, not just that every byte, word or longword comes
to the right address?

r8168_mac_ocp_read()/r8168_mac_ocp_write() guarantee that every piece of
data will end being read or written at the right address, OK. But this
does not seem to guarantee the SEQUENTIAL ORDER of the programming.

I mean, if we are dealing with physical hardware like a NIC, the order
of (especially writing) data might be crucial. 8-/

Am I making any sense?

Are we algorithmically secured that two threads will never attempt to
write data to NIC hardware registers?

Thanks.

Best regards,
Mirsad Todorovac

> Andrew

--
CARNet system engineer
Faculty of Graphic Arts | Academy of Fine Arts
University of Zagreb

CARNet sistem inženjer
Grafički fakultet | Akademija likovnih umjetnosti
Sveučilište u Zagrebu

2024-01-17 17:04:28

by Andrew Lunn

[permalink] [raw]

Subject: Re: [PATCH AUTOSEL 6.7 021/108] r8169: improve RTL8411b phy-down fixup

> Hi Andrew,
>
> Yes, I wasn't aware of the 100 lines limit, and yes, this is not a bug fix,
> but an improvement (optimisation).
>
> I think by this I can join to consensus, this patch is not a candidate for
> backporting. :-/
>
> However, I am concerned about the possibility of two kernel threads accessing
> the RTL NIC intermittently attempting to program the NIC over the RTL_(R|W)(8|16|32)
> instructions (which are expanded to readl/writel and assembly).

Most calls into the driver are protected by the RTNL lock. There are a
few exceptions. probe() obviously does not hold RTNL. Actually sending
packets, and interrupt handlers don't hold RTNL.

Please look at the code and see if you can see any paths which might
do parallel access without holding RTNL.

You could also do some testing. Add ASSERT_RTNL() in the code you are
worried about. If the lock is not held, you will get a stack trace.

Andrew