On PPC architecture with phy->rev == 1, machine checks occur during
initialization of the "Extended G PHY registers". This problem was
also seen on bcm43xx-softmac, and was fixed by conditionally skipping
over certain reads/writes of these registers. The same solution has been
applied here with testing by David Woodhouse. Note: These modifications
are not found in the specifications, but are needed for PPC.
Signed-off-by: Larry Finger <[email protected]>
---
John and Michael,
This patch will apply to both the wireless-dev and the mb trees.
Larry
bcm43xx_phy.c | 50 +++++++++++++++++++++++++++++---------------------
1 file changed, 29 insertions(+), 21 deletions(-)
Index: wireless-dev/drivers/net/wireless/mac80211/bcm43xx/bcm43xx_phy.c
===================================================================
--- wireless-dev.orig/drivers/net/wireless/mac80211/bcm43xx/bcm43xx_phy.c
+++ wireless-dev/drivers/net/wireless/mac80211/bcm43xx/bcm43xx_phy.c
@@ -1362,7 +1362,7 @@ static void bcm43xx_phy_initb6(struct bc
static void bcm43xx_calc_loopback_gain(struct bcm43xx_wldev *dev)
{
struct bcm43xx_phy *phy = &dev->phy;
- u16 backup_phy[16];
+ u16 backup_phy[16] = {0};
u16 backup_radio[3];
u16 backup_bband;
u16 i, j, loop_i_max;
@@ -1373,8 +1373,10 @@ static void bcm43xx_calc_loopback_gain(s
backup_phy[1] = bcm43xx_phy_read(dev, BCM43xx_PHY_CCKBBANDCFG);
backup_phy[2] = bcm43xx_phy_read(dev, BCM43xx_PHY_RFOVER);
backup_phy[3] = bcm43xx_phy_read(dev, BCM43xx_PHY_RFOVERVAL);
- backup_phy[4] = bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVER);
- backup_phy[5] = bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVERVAL);
+ if (phy->rev != 1) {
+ backup_phy[4] = bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVER);
+ backup_phy[5] = bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVERVAL);
+ }
backup_phy[6] = bcm43xx_phy_read(dev, BCM43xx_PHY_BASE(0x5A));
backup_phy[7] = bcm43xx_phy_read(dev, BCM43xx_PHY_BASE(0x59));
backup_phy[8] = bcm43xx_phy_read(dev, BCM43xx_PHY_BASE(0x58));
@@ -1402,14 +1404,16 @@ static void bcm43xx_calc_loopback_gain(s
bcm43xx_phy_read(dev, BCM43xx_PHY_RFOVER) | 0x0001);
bcm43xx_phy_write(dev, BCM43xx_PHY_RFOVERVAL,
bcm43xx_phy_read(dev, BCM43xx_PHY_RFOVERVAL) & 0xFFFE);
- bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVER,
- bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVER) | 0x0001);
- bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVERVAL,
- bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVERVAL) & 0xFFFE);
- bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVER,
- bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVER) | 0x0002);
- bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVERVAL,
- bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVERVAL) & 0xFFFD);
+ if (phy->rev != 1) {
+ bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVER,
+ bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVER) | 0x0001);
+ bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVERVAL,
+ bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVERVAL) & 0xFFFE);
+ bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVER,
+ bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVER) | 0x0002);
+ bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVERVAL,
+ bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVERVAL) & 0xFFFD);
+ }
bcm43xx_phy_write(dev, BCM43xx_PHY_RFOVER,
bcm43xx_phy_read(dev, BCM43xx_PHY_RFOVER) | 0x000C);
bcm43xx_phy_write(dev, BCM43xx_PHY_RFOVER,
@@ -1426,10 +1430,12 @@ static void bcm43xx_calc_loopback_gain(s
bcm43xx_phy_write(dev, BCM43xx_PHY_BASE(0x0A),
bcm43xx_phy_read(dev, BCM43xx_PHY_BASE(0x0A)) | 0x2000);
- bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVER,
- bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVER) | 0x0004);
- bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVERVAL,
- bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVERVAL) & 0xFFFB);
+ if (phy->rev != 1) {
+ bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVER,
+ bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVER) | 0x0004);
+ bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVERVAL,
+ bcm43xx_phy_read(dev, BCM43xx_PHY_ANALOGOVERVAL) & 0xFFFB);
+ }
bcm43xx_phy_write(dev, BCM43xx_PHY_BASE(0x03),
(bcm43xx_phy_read(dev, BCM43xx_PHY_BASE(0x03))
& 0xFF9F) | 0x40);
@@ -1522,8 +1528,10 @@ exit_loop1:
trsw_rx = 0x18;
exit_loop2:
- bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVER, backup_phy[4]);
- bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVERVAL, backup_phy[5]);
+ if (phy->rev != 1) {
+ bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVER, backup_phy[4]);
+ bcm43xx_phy_write(dev, BCM43xx_PHY_ANALOGOVERVAL, backup_phy[5]);
+ }
bcm43xx_phy_write(dev, BCM43xx_PHY_BASE(0x5A), backup_phy[6]);
bcm43xx_phy_write(dev, BCM43xx_PHY_BASE(0x59), backup_phy[7]);
bcm43xx_phy_write(dev, BCM43xx_PHY_BASE(0x58), backup_phy[8]);
@@ -1576,7 +1584,7 @@ static void bcm43xx_phy_initg(struct bcm
bcm43xx_phy_write(dev, BCM43xx_PHY_RFOVER, 0x400);
bcm43xx_phy_write(dev, BCM43xx_PHY_PGACTL, 0xC0);
}
- if (phy->gmode) {
+ if (phy->gmode && phy->rev >= 2) {
tmp = bcm43xx_phy_read(dev, BCM43xx_PHY_VERSION_OFDM);
tmp &= BCM43xx_PHYVER_VERSION;
if (tmp == 3 || tmp == 5) {
@@ -1635,7 +1643,7 @@ static void bcm43xx_phy_initg(struct bcm
else
bcm43xx_phy_write(dev, BCM43xx_PHY_BASE(0x2F), 0x202);
}
- if (phy->gmode) {
+ if (phy->gmode && phy->rev != 1) {
bcm43xx_lo_adjust(dev);
bcm43xx_phy_write(dev, BCM43xx_PHY_LO_MASK, 0x8078);
}
@@ -1649,8 +1657,8 @@ static void bcm43xx_phy_initg(struct bcm
*/
bcm43xx_nrssi_hw_update(dev, 0xFFFF);//FIXME?
bcm43xx_calc_nrssi_threshold(dev);
- } else {
- if (phy->gmode && phy->nrssi[0] == -1000) {
+ } else if (phy->gmode && phy->rev != 1) {
+ if (phy->nrssi[0] == -1000) {
assert(phy->nrssi[1] == -1000);
bcm43xx_calc_nrssi_slope(dev);
} else
On Fri, 2007-04-13 at 10:06 -0500, Larry Finger wrote:
> I look forward to your rewrite of the setup and init routines. Perhaps they will even let my 4311
> work with mac80211, but I will be very surprised if the PPC machine check problem goes away.
With phy rev == 1, the gmode bit is assumed unset when initialising a G
PHY. Maybe that is the crucial difference. David will send me some
backtraces of this problem, once he does I'll look at it again.
johannes
John,
In case you cannot find it, here is a copy of David's patch.
Larry
From David Woodhouse.
It's very simple... if we use inw instead of readw for the faulting I/O
access, then the machine check gets trapped. We can do this because on
PPC, PCI I/O is actually memory-mapped. The CPU doesn't have a separate
I/O space and instructions. So we just compensate for the address at
which PCI I/O is mapped, and abuse inw().
Then we make bcm43xx_phy_read() print the register it tried to access
whenever it gets 0xFFFF, and stick a WARN_ON(1) in the machine check
handler when it detects a machine check caused by an I/O access...
--- ../linux-2.6.20.ppc64/./drivers/net/wireless/mac80211/bcm43xx/bcm43xx_phy.c 2007-04-14
21:51:02.000000000 +0100
+++ ./drivers/net/wireless/mac80211/bcm43xx/bcm43xx_phy.c 2007-04-16 01:39:44.000000000 +0100
@@ -269,10 +270,13 @@ static inline u16 adjust_phyreg_for_phyt
u16 bcm43xx_phy_read(struct bcm43xx_wldev *dev, u16 offset)
{
struct bcm43xx_phy *phy = &dev->phy;
-
+ uint16_t foo;
offset = adjust_phyreg_for_phytype(phy, offset);
bcm43xx_write16(dev, BCM43xx_MMIO_PHY_CONTROL, offset);
- return bcm43xx_read16(dev, BCM43xx_MMIO_PHY_DATA);
+ foo = bcm43xx_read16(dev, BCM43xx_MMIO_PHY_DATA);
+ if (foo == 0xffff)
+ printk(KERN_DEBUG "Read phy reg %x; got 0xFFFF.\n", offset);
+ return foo;
}
void bcm43xx_phy_write(struct bcm43xx_wldev *dev, u16 offset, u16 val)
--- ../linux-2.6.20.ppc64/./drivers/ssb/pci.c 2007-04-14 21:51:02.000000000 +0100
+++ ./drivers/ssb/pci.c 2007-04-15 23:44:27.000000000 +0100
@@ -475,6 +475,7 @@ static u16 ssb_pci_read16(struct ssb_dev
if (unlikely(ssb_pci_switch_core(bus, dev)))
return 0xFFFF;
}
+ return inw(bus->mmio + offset - isa_io_base);
return readw(bus->mmio + offset);
}
--- ../linux-2.6.20.ppc64/./arch/powerpc/kernel/traps.c 2007-04-14 21:50:40.000000000 +0100
+++ ./arch/powerpc/kernel/traps.c 2007-04-15 23:45:48.000000000 +0100
@@ -343,8 +343,10 @@ void machine_check_exception(struct pt_r
return;
}
- if (check_io_access(regs))
+ if (check_io_access(regs)) {
+ WARN_ON(1);
return;
+ }
#if defined(CONFIG_4xx) && !defined(CONFIG_440A)
if (reason & ESR_IMCP) {
--
dwmw2
_______________________________________________
Bcm43xx-dev mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/bcm43xx-dev
John W. Linville wrote:
> On Wed, Apr 11, 2007 at 11:08:53AM -0500, Larry Finger wrote:
>> On PPC architecture with phy->rev == 1, machine checks occur during
>> initialization of the "Extended G PHY registers". This problem was
>> also seen on bcm43xx-softmac, and was fixed by conditionally skipping
>> over certain reads/writes of these registers. The same solution has been
>> applied here with testing by David Woodhouse. Note: These modifications
>> are not found in the specifications, but are needed for PPC.
>
> I added this patch to the Fedora rawhide kernels, but our Fedora QA
> lead reports that he still has this crash:
>
> http://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=233011
>
> I had diagnosed this to be the same crash as this patch was supposed
> to address. Was I in error?
>
> He also reports that the problem still occurs on my FC7 test kernels,
> which are very close to the current rawhide kernels but with the most
> recent round of wireless-dev updates added.
I suspect the machine check is from the same kind of problem; however, the soft lockup is different.
It is always possible that it changes the behavior. There is also the different program flow in
mac80211. I just put in the ones that failed in softmac.
Your tester needs to get a copy of David's hack that prints out the address of the offending
register. That is the only way to tell what is happening. Once we know the address, then it will be
a matter of getting printk's into the code to tell which one is failing.
Larry
Stefano Brivio wrote:
> On Wed, 11 Apr 2007 11:08:53 -0500
> Larry Finger <[email protected]> wrote:
>
>> On PPC architecture with phy->rev == 1, machine checks occur during
>> initialization of the "Extended G PHY registers". This problem was
>> also seen on bcm43xx-softmac, and was fixed by conditionally skipping
>> over certain reads/writes of these registers. The same solution has been
>> applied here with testing by David Woodhouse. Note: These modifications
>> are not found in the specifications, but are needed for PPC.
>
> I don't think this patch has to be reverted, but I still think that it's
> better to notice the reverse engineers team about failing operations
> and understand the real problem rather than hiding it. Which works in some
> cases, but sometimes just hide other bugs.
>
> I'm almost done with rewriting the whole A/G/N setup and init routines,
> following the new v4 specs. This issue could even be related.
I agree that it would be better if we could follow the specs and not have the machine check problem.
My impression is that early versions of the bcm43xx driver did not have this difficulty, but that it
appeared after a relatively recent change in the G init specs. BTW, these were the changes that
greatly improved performance, particularly on the phy->rev = 8 devices. Perhaps the Broadcom
software abandoned/ignored the phy->rev = 1 units running on PPC hardware, which seems to be the
only combination that shows the problem. I don't know about MIPS hardware, but the problem is not
seen on x86 architecture.
I look forward to your rewrite of the setup and init routines. Perhaps they will even let my 4311
work with mac80211, but I will be very surprised if the PPC machine check problem goes away.
Larry
On Mon, 2007-04-16 at 01:06 +0100, David Woodhouse wrote:
> On Fri, 2007-04-13 at 17:17 +0200, Johannes Berg wrote:
> > With phy rev == 1, the gmode bit is assumed unset when initialising a
> > G PHY. Maybe that is the crucial difference. David will send me some
> > backtraces of this problem, once he does I'll look at it again.
>
> http://david.woodhou.se/bcm43xx-mac80211.dmesg-3.txt
>
> The module itself is at http://david.woodhou.se/bcm43xx-mac80211.ko so
> you can find line numbers where they're not immediately obvious.
That was without Larry's latest patch applied. If I apply Larry's patch,
we get fewer machine checks...
http://david.woodhou.se/bcm43xx-mac80211-lf.dmesg.txt
http://david.woodhou.se/bcm43xx-mac80211-lf.ko
Strange, I thought that when I'd tested Larry's patch I was left with
_no_ machine checks. Perhaps at that point I still had a printk on every
PHY read or write and the machine checks were lost in the noise.
--
dwmw2
On Thu, 2007-04-12 at 20:09 -0400, John W. Linville wrote:
> I added this patch to the Fedora rawhide kernels, but our Fedora QA
> lead reports that he still has this crash:
>
> http://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=233011
>
> I had diagnosed this to be the same crash as this patch was supposed
> to address. Was I in error?
I also observed the machine check with the current mb branch this
morning, although I didn't have a chance to capture it, so I wanted to
reported it later when I would have the details.
I was testing the patch on a G4 PowerMac. I also have a G3 PowerMac
where I tested older versions, and I intend to test the current code on
it as well.
> Any thoughts?
Apparently more changes are needed. I don't think the latest patch
needs to be reverted.
--
Regards,
Pavel Roskin
On Wed, 11 Apr 2007 11:08:53 -0500
Larry Finger <[email protected]> wrote:
> On PPC architecture with phy->rev == 1, machine checks occur during
> initialization of the "Extended G PHY registers". This problem was
> also seen on bcm43xx-softmac, and was fixed by conditionally skipping
> over certain reads/writes of these registers. The same solution has been
> applied here with testing by David Woodhouse. Note: These modifications
> are not found in the specifications, but are needed for PPC.
I don't think this patch has to be reverted, but I still think that it's
better to notice the reverse engineers team about failing operations
and understand the real problem rather than hiding it. Which works in some
cases, but sometimes just hide other bugs.
I'm almost done with rewriting the whole A/G/N setup and init routines,
following the new v4 specs. This issue could even be related.
--
Ciao
Stefano
On Thu, 2007-04-12 at 20:22 -0500, Larry Finger wrote:
> Your tester needs to get a copy of David's hack that prints out the address of the offending
> register. That is the only way to tell what is happening. Once we know the address, then it will be
> a matter of getting printk's into the code to tell which one is failing.
It's very simple... if we use inw instead of readw for the faulting I/O
access, then the machine check gets trapped. We can do this because on
PPC, PCI I/O is actually memory-mapped. The CPU doesn't have a separate
I/O space and instructions. So we just compensate for the address at
which PCI I/O is mapped, and abuse inw().
Then we make bcm43xx_phy_read() print the register it tried to access
whenever it gets 0xFFFF, and stick a WARN_ON(1) in the machine check
handler when it detects a machine check caused by an I/O access...
--- ../linux-2.6.20.ppc64/./drivers/net/wireless/mac80211/bcm43xx/bcm43xx_phy.c 2007-04-14 21:51:02.000000000 +0100
+++ ./drivers/net/wireless/mac80211/bcm43xx/bcm43xx_phy.c 2007-04-16 01:39:44.000000000 +0100
@@ -269,10 +270,13 @@ static inline u16 adjust_phyreg_for_phyt
u16 bcm43xx_phy_read(struct bcm43xx_wldev *dev, u16 offset)
{
struct bcm43xx_phy *phy = &dev->phy;
-
+ uint16_t foo;
offset = adjust_phyreg_for_phytype(phy, offset);
bcm43xx_write16(dev, BCM43xx_MMIO_PHY_CONTROL, offset);
- return bcm43xx_read16(dev, BCM43xx_MMIO_PHY_DATA);
+ foo = bcm43xx_read16(dev, BCM43xx_MMIO_PHY_DATA);
+ if (foo == 0xffff)
+ printk(KERN_DEBUG "Read phy reg %x; got 0xFFFF.\n", offset);
+ return foo;
}
void bcm43xx_phy_write(struct bcm43xx_wldev *dev, u16 offset, u16 val)
--- ../linux-2.6.20.ppc64/./drivers/ssb/pci.c 2007-04-14 21:51:02.000000000 +0100
+++ ./drivers/ssb/pci.c 2007-04-15 23:44:27.000000000 +0100
@@ -475,6 +475,7 @@ static u16 ssb_pci_read16(struct ssb_dev
if (unlikely(ssb_pci_switch_core(bus, dev)))
return 0xFFFF;
}
+ return inw(bus->mmio + offset - isa_io_base);
return readw(bus->mmio + offset);
}
--- ../linux-2.6.20.ppc64/./arch/powerpc/kernel/traps.c 2007-04-14 21:50:40.000000000 +0100
+++ ./arch/powerpc/kernel/traps.c 2007-04-15 23:45:48.000000000 +0100
@@ -343,8 +343,10 @@ void machine_check_exception(struct pt_r
return;
}
- if (check_io_access(regs))
+ if (check_io_access(regs)) {
+ WARN_ON(1);
return;
+ }
#if defined(CONFIG_4xx) && !defined(CONFIG_440A)
if (reason & ESR_IMCP) {
--
dwmw2
On Wednesday 11 April 2007 18:08, Larry Finger wrote:
> On PPC architecture with phy->rev == 1, machine checks occur during
> initialization of the "Extended G PHY registers". This problem was
> also seen on bcm43xx-softmac, and was fixed by conditionally skipping
> over certain reads/writes of these registers. The same solution has been
> applied here with testing by David Woodhouse. Note: These modifications
> are not found in the specifications, but are needed for PPC.
>
> Signed-off-by: Larry Finger <[email protected]>
> ---
>
> John and Michael,
>
> This patch will apply to both the wireless-dev and the mb trees.
Applied to my tree, thanks.
http://bu3sch.de/gitweb?p=wireless-dev.git;a=commitdiff;h=e25588a08a1b7e0d500c30dd0ccee76d318fe72d
--
Greetings Michael.
On Fri, 2007-04-13 at 17:17 +0200, Johannes Berg wrote:
> With phy rev == 1, the gmode bit is assumed unset when initialising a
> G PHY. Maybe that is the crucial difference. David will send me some
> backtraces of this problem, once he does I'll look at it again.
http://david.woodhou.se/bcm43xx-mac80211.dmesg-3.txt
The module itself is at http://david.woodhou.se/bcm43xx-mac80211.ko so
you can find line numbers where they're not immediately obvious.
--
dwmw2
On Wed, Apr 11, 2007 at 11:08:53AM -0500, Larry Finger wrote:
> On PPC architecture with phy->rev == 1, machine checks occur during
> initialization of the "Extended G PHY registers". This problem was
> also seen on bcm43xx-softmac, and was fixed by conditionally skipping
> over certain reads/writes of these registers. The same solution has been
> applied here with testing by David Woodhouse. Note: These modifications
> are not found in the specifications, but are needed for PPC.
I added this patch to the Fedora rawhide kernels, but our Fedora QA
lead reports that he still has this crash:
http://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=233011
I had diagnosed this to be the same crash as this patch was supposed
to address. Was I in error?
He also reports that the problem still occurs on my FC7 test kernels,
which are very close to the current rawhide kernels but with the most
recent round of wireless-dev updates added.
http://people.redhat.com/linville/kernels/fc7/
Any thoughts?
John
--
John W. Linville
[email protected]