2023-03-22 19:59:15

by Gregg Wonderly

[permalink] [raw]
Subject: shift exponent 35 is too large @ ath/ath9k/ar9003_hw.c:1147

I am receiving a console error message from this driver that appears to be in the following function. In this function, the chk_dbg variable is 32bits and there is logic that seems to attempt to select from 1 of 2 different 32bit values to get a 64bit wide mask value into chk_dbg from dma_dbg_4 or dmc_dbg_5.

The problem is that the (5*i) shift count should be have i adjusted by the 6 limit used to make the check for which dma_dbg_[45] value selected.


static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah)
{
u32 dma_dbg_4, dma_dbg_5, dma_dbg_6, chk_dbg;
u8 dcu_chain_state, dcu_complete_state;
bool dcu_wait_frdone = false;
unsigned long chk_dcu = 0;
unsigned int i = 0;
dma_dbg_4 = REG_READ(ah, AR_DMADBG_4);
dma_dbg_5 = REG_READ(ah, AR_DMADBG_5);
dma_dbg_6 = REG_READ(ah, AR_DMADBG_6);
dcu_complete_state = dma_dbg_6 & 0x3;
if (dcu_complete_state != 0x1)
goto exit;
for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
if (i < 6)
chk_dbg = dma_dbg_4;
else
chk_dbg = dma_dbg_5;
dcu_chain_state = (chk_dbg >> (5 * i)) & 0x1f;
if (dcu_chain_state == 0x6) {
dcu_wait_frdone = true;
chk_dcu |= BIT(i);
}
}
if ((dcu_complete_state == 0x1) && dcu_wait_frdone) {
for_each_set_bit(i, &chk_dcu, ATH9K_NUM_TX_QUEUES) {
if (ath9k_hw_verify_hang(ah, i))
return true;
}
}
exit:
return false;
}

The for loop seems to need to look like the following:

for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
int off=i;
if (i < 6) {
chk_dbg = dma_dbg_4;
} else {
chk_dbg = dma_dbg_5;
off = i - 6;
}
dcu_chain_state = (chk_dbg >> (5 * off)) & 0x1f;
if (dcu_chain_state == 0x6) {
dcu_wait_frdone = true;
chk_dcu |= BIT(i);
}
}

it would be best to have a constant declared that would be based on ATH9K_NUM_TX_QUEUES and the magical 32bits of space the declarations limit the calculations to.
it seem that the mask of 0x1f suggests that there are 5 bits per queue. So there would be 2 bits left in dma_dbg_4 potentially, but the logic suggests that there are simply 6 groups of 5 bits in each of the registers without there being a split of the value across the 32-bit boundary.

Gregg Wonderly


2023-03-22 21:40:54

by Toke Høiland-Jørgensen

[permalink] [raw]
Subject: Re: shift exponent 35 is too large @ ath/ath9k/ar9003_hw.c:1147

Gregg Wonderly <[email protected]> writes:

> I am receiving a console error message from this driver that appears to be in the following function. In this function, the chk_dbg variable is 32bits and there is logic that seems to attempt to select from 1 of 2 different 32bit values to get a 64bit wide mask value into chk_dbg from dma_dbg_4 or dmc_dbg_5.
>
> The problem is that the (5*i) shift count should be have i adjusted by the 6 limit used to make the check for which dma_dbg_[45] value selected.
>
>
> static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah)
> {
> u32 dma_dbg_4, dma_dbg_5, dma_dbg_6, chk_dbg;
> u8 dcu_chain_state, dcu_complete_state;
> bool dcu_wait_frdone = false;
> unsigned long chk_dcu = 0;
> unsigned int i = 0;
> dma_dbg_4 = REG_READ(ah, AR_DMADBG_4);
> dma_dbg_5 = REG_READ(ah, AR_DMADBG_5);
> dma_dbg_6 = REG_READ(ah, AR_DMADBG_6);
> dcu_complete_state = dma_dbg_6 & 0x3;
> if (dcu_complete_state != 0x1)
> goto exit;
> for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
> if (i < 6)
> chk_dbg = dma_dbg_4;
> else
> chk_dbg = dma_dbg_5;
> dcu_chain_state = (chk_dbg >> (5 * i)) & 0x1f;
> if (dcu_chain_state == 0x6) {
> dcu_wait_frdone = true;
> chk_dcu |= BIT(i);
> }
> }
> if ((dcu_complete_state == 0x1) && dcu_wait_frdone) {
> for_each_set_bit(i, &chk_dcu, ATH9K_NUM_TX_QUEUES) {
> if (ath9k_hw_verify_hang(ah, i))
> return true;
> }
> }
> exit:
> return false;
> }
>
> The for loop seems to need to look like the following:
>
> for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
> int off=i;
> if (i < 6) {
> chk_dbg = dma_dbg_4;
> } else {
> chk_dbg = dma_dbg_5;
> off = i - 6;
> }
> dcu_chain_state = (chk_dbg >> (5 * off)) & 0x1f;
> if (dcu_chain_state == 0x6) {
> dcu_wait_frdone = true;
> chk_dcu |= BIT(i);
> }
> }
>

Did you test this? Please send a proper patch :)

-Toke

2023-03-30 13:46:29

by Gregg Wonderly

[permalink] [raw]
Subject: Re: shift exponent 35 is too large @ ath/ath9k/ar9003_hw.c:1147

I have not tested this. I am in the middle of testing on this machine of many other things and building a kernel right now is not on my timeline. Note that I have a magic 6 constant in there. I derived this from dividing 32 by the bit mask 0x1f width of 5. But looking further at this, it seems like chk_dbg should actually be a u64, and dma_dbg_4 and dma_dbg_5 placed into that value as a continuous 64bit value. But again, I don’t know if there are 2 bits at the top of dma_dbg_4 and 3 bits at the bottom of dma_dbg_5 that go together. This needs to be fixed by someone with the time and the knowledge of what’s going on in the hardware.

Gregg Wonderly

> On Mar 22, 2023, at 4:33 PM, Toke Høiland-Jørgensen <[email protected]> wrote:
>
> Gregg Wonderly <[email protected]> writes:
>
>> I am receiving a console error message from this driver that appears to be in the following function. In this function, the chk_dbg variable is 32bits and there is logic that seems to attempt to select from 1 of 2 different 32bit values to get a 64bit wide mask value into chk_dbg from dma_dbg_4 or dmc_dbg_5.
>>
>> The problem is that the (5*i) shift count should be have i adjusted by the 6 limit used to make the check for which dma_dbg_[45] value selected.
>>
>>
>> static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah)
>> {
>> u32 dma_dbg_4, dma_dbg_5, dma_dbg_6, chk_dbg;
>> u8 dcu_chain_state, dcu_complete_state;
>> bool dcu_wait_frdone = false;
>> unsigned long chk_dcu = 0;
>> unsigned int i = 0;
>> dma_dbg_4 = REG_READ(ah, AR_DMADBG_4);
>> dma_dbg_5 = REG_READ(ah, AR_DMADBG_5);
>> dma_dbg_6 = REG_READ(ah, AR_DMADBG_6);
>> dcu_complete_state = dma_dbg_6 & 0x3;
>> if (dcu_complete_state != 0x1)
>> goto exit;
>> for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
>> if (i < 6)
>> chk_dbg = dma_dbg_4;
>> else
>> chk_dbg = dma_dbg_5;
>> dcu_chain_state = (chk_dbg >> (5 * i)) & 0x1f;
>> if (dcu_chain_state == 0x6) {
>> dcu_wait_frdone = true;
>> chk_dcu |= BIT(i);
>> }
>> }
>> if ((dcu_complete_state == 0x1) && dcu_wait_frdone) {
>> for_each_set_bit(i, &chk_dcu, ATH9K_NUM_TX_QUEUES) {
>> if (ath9k_hw_verify_hang(ah, i))
>> return true;
>> }
>> }
>> exit:
>> return false;
>> }
>>
>> The for loop seems to need to look like the following:
>>
>> for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
>> int off=i;
>> if (i < 6) {
>> chk_dbg = dma_dbg_4;
>> } else {
>> chk_dbg = dma_dbg_5;
>> off = i - 6;
>> }
>> dcu_chain_state = (chk_dbg >> (5 * off)) & 0x1f;
>> if (dcu_chain_state == 0x6) {
>> dcu_wait_frdone = true;
>> chk_dcu |= BIT(i);
>> }
>> }
>>
>
> Did you test this? Please send a proper patch :)
>
> -Toke

2023-03-30 16:27:52

by Peter Seiderer

[permalink] [raw]
Subject: Re: shift exponent 35 is too large @ ath/ath9k/ar9003_hw.c:1147

Hello Gregg, Toke,

On Thu, 30 Mar 2023 08:44:25 -0500, Gregg Wonderly <[email protected]> wrote:

> I have not tested this. I am in the middle of testing on this machine of many other things and building a kernel right now is not on my timeline. Note that I have a magic 6 constant in there. I derived this from dividing 32 by the bit mask 0x1f width of 5. But looking further at this, it seems like chk_dbg should actually be a u64, and dma_dbg_4 and dma_dbg_5 placed into that value as a continuous 64bit value. But again, I don’t know if there are 2 bits at the top of dma_dbg_4 and 3 bits at the bottom of dma_dbg_5 that go together. This needs to be fixed by someone with the time and the knowledge of what’s going on in the hardware.

The comment from drivers/net/wireless/ath/ath9k/ar9003_hw.c only some
lines above seems to support your reasoning (for the '6' constant, not
for the packaging into an u64 - bit 30-31 unused):

1073 /*
1074 * MAC HW hang check
1075 * =================
1076 *
1077 * Signature: dcu_chain_state is 0x6 and dcu_complete_state is 0x1.
1078 *
1079 * The state of each DCU chain (mapped to TX queues) is available from these
1080 * DMA debug registers:
1081 *
1082 * Chain 0 state : Bits 4:0 of AR_DMADBG_4
1083 * Chain 1 state : Bits 9:5 of AR_DMADBG_4
1084 * Chain 2 state : Bits 14:10 of AR_DMADBG_4
1085 * Chain 3 state : Bits 19:15 of AR_DMADBG_4
1086 * Chain 4 state : Bits 24:20 of AR_DMADBG_4
1087 * Chain 5 state : Bits 29:25 of AR_DMADBG_4
1088 * Chain 6 state : Bits 4:0 of AR_DMADBG_5
1089 * Chain 7 state : Bits 9:5 of AR_DMADBG_5
1090 * Chain 8 state : Bits 14:10 of AR_DMADBG_5
1091 * Chain 9 state : Bits 19:15 of AR_DMADBG_5
1092 *
1093 * The DCU chain state "0x6" means "WAIT_FRDONE" - wait for TX frame to be done.
1094 */

But with the same/similar bug some lines below (dma_dbg_chain is AR_DMADBG_4
for queue < 6 and AR_DMADBG_5 above):

1112 dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;

Regards,
Peter


>
> Gregg Wonderly
>
> > On Mar 22, 2023, at 4:33 PM, Toke Høiland-Jørgensen <[email protected]> wrote:
> >
> > Gregg Wonderly <[email protected]> writes:
> >
> >> I am receiving a console error message from this driver that appears to be in the following function. In this function, the chk_dbg variable is 32bits and there is logic that seems to attempt to select from 1 of 2 different 32bit values to get a 64bit wide mask value into chk_dbg from dma_dbg_4 or dmc_dbg_5.
> >>
> >> The problem is that the (5*i) shift count should be have i adjusted by the 6 limit used to make the check for which dma_dbg_[45] value selected.
> >>
> >>
> >> static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah)
> >> {
> >> u32 dma_dbg_4, dma_dbg_5, dma_dbg_6, chk_dbg;
> >> u8 dcu_chain_state, dcu_complete_state;
> >> bool dcu_wait_frdone = false;
> >> unsigned long chk_dcu = 0;
> >> unsigned int i = 0;
> >> dma_dbg_4 = REG_READ(ah, AR_DMADBG_4);
> >> dma_dbg_5 = REG_READ(ah, AR_DMADBG_5);
> >> dma_dbg_6 = REG_READ(ah, AR_DMADBG_6);
> >> dcu_complete_state = dma_dbg_6 & 0x3;
> >> if (dcu_complete_state != 0x1)
> >> goto exit;
> >> for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
> >> if (i < 6)
> >> chk_dbg = dma_dbg_4;
> >> else
> >> chk_dbg = dma_dbg_5;
> >> dcu_chain_state = (chk_dbg >> (5 * i)) & 0x1f;
> >> if (dcu_chain_state == 0x6) {
> >> dcu_wait_frdone = true;
> >> chk_dcu |= BIT(i);
> >> }
> >> }
> >> if ((dcu_complete_state == 0x1) && dcu_wait_frdone) {
> >> for_each_set_bit(i, &chk_dcu, ATH9K_NUM_TX_QUEUES) {
> >> if (ath9k_hw_verify_hang(ah, i))
> >> return true;
> >> }
> >> }
> >> exit:
> >> return false;
> >> }
> >>
> >> The for loop seems to need to look like the following:
> >>
> >> for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
> >> int off=i;
> >> if (i < 6) {
> >> chk_dbg = dma_dbg_4;
> >> } else {
> >> chk_dbg = dma_dbg_5;
> >> off = i - 6;
> >> }
> >> dcu_chain_state = (chk_dbg >> (5 * off)) & 0x1f;
> >> if (dcu_chain_state == 0x6) {
> >> dcu_wait_frdone = true;
> >> chk_dcu |= BIT(i);
> >> }
> >> }
> >>
> >
> > Did you test this? Please send a proper patch :)
> >
> > -Toke
>

2023-03-30 16:57:31

by Gregg Wonderly

[permalink] [raw]
Subject: Re: shift exponent 35 is too large @ ath/ath9k/ar9003_hw.c:1147

Okay, I had not looked for that detail. Thanks for pointing it out. Clearly my initial thought would work. The ‘6’ should just be cast into a more explanatory local valued name, or a #define of some sort.

I also had not looked to see if the same logic was elsewhere. So that implies there could be a small refactoring to put this logic in on place too.

Gregg Wonderly

> On Mar 30, 2023, at 11:11 AM, Peter Seiderer <[email protected]> wrote:
>
> Hello Gregg, Toke,
>
> On Thu, 30 Mar 2023 08:44:25 -0500, Gregg Wonderly <[email protected]> wrote:
>
>> I have not tested this. I am in the middle of testing on this machine of many other things and building a kernel right now is not on my timeline. Note that I have a magic 6 constant in there. I derived this from dividing 32 by the bit mask 0x1f width of 5. But looking further at this, it seems like chk_dbg should actually be a u64, and dma_dbg_4 and dma_dbg_5 placed into that value as a continuous 64bit value. But again, I don’t know if there are 2 bits at the top of dma_dbg_4 and 3 bits at the bottom of dma_dbg_5 that go together. This needs to be fixed by someone with the time and the knowledge of what’s going on in the hardware.
>
> The comment from drivers/net/wireless/ath/ath9k/ar9003_hw.c only some
> lines above seems to support your reasoning (for the '6' constant, not
> for the packaging into an u64 - bit 30-31 unused):
>
> 1073 /*
> 1074 * MAC HW hang check
> 1075 * =================
> 1076 *
> 1077 * Signature: dcu_chain_state is 0x6 and dcu_complete_state is 0x1.
> 1078 *
> 1079 * The state of each DCU chain (mapped to TX queues) is available from these
> 1080 * DMA debug registers:
> 1081 *
> 1082 * Chain 0 state : Bits 4:0 of AR_DMADBG_4
> 1083 * Chain 1 state : Bits 9:5 of AR_DMADBG_4
> 1084 * Chain 2 state : Bits 14:10 of AR_DMADBG_4
> 1085 * Chain 3 state : Bits 19:15 of AR_DMADBG_4
> 1086 * Chain 4 state : Bits 24:20 of AR_DMADBG_4
> 1087 * Chain 5 state : Bits 29:25 of AR_DMADBG_4
> 1088 * Chain 6 state : Bits 4:0 of AR_DMADBG_5
> 1089 * Chain 7 state : Bits 9:5 of AR_DMADBG_5
> 1090 * Chain 8 state : Bits 14:10 of AR_DMADBG_5
> 1091 * Chain 9 state : Bits 19:15 of AR_DMADBG_5
> 1092 *
> 1093 * The DCU chain state "0x6" means "WAIT_FRDONE" - wait for TX frame to be done.
> 1094 */
>
> But with the same/similar bug some lines below (dma_dbg_chain is AR_DMADBG_4
> for queue < 6 and AR_DMADBG_5 above):
>
> 1112 dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;
>
> Regards,
> Peter
>
>
>>
>> Gregg Wonderly
>>
>>> On Mar 22, 2023, at 4:33 PM, Toke Høiland-Jørgensen <[email protected]> wrote:
>>>
>>> Gregg Wonderly <[email protected]> writes:
>>>
>>>> I am receiving a console error message from this driver that appears to be in the following function. In this function, the chk_dbg variable is 32bits and there is logic that seems to attempt to select from 1 of 2 different 32bit values to get a 64bit wide mask value into chk_dbg from dma_dbg_4 or dmc_dbg_5.
>>>>
>>>> The problem is that the (5*i) shift count should be have i adjusted by the 6 limit used to make the check for which dma_dbg_[45] value selected.
>>>>
>>>>
>>>> static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah)
>>>> {
>>>> u32 dma_dbg_4, dma_dbg_5, dma_dbg_6, chk_dbg;
>>>> u8 dcu_chain_state, dcu_complete_state;
>>>> bool dcu_wait_frdone = false;
>>>> unsigned long chk_dcu = 0;
>>>> unsigned int i = 0;
>>>> dma_dbg_4 = REG_READ(ah, AR_DMADBG_4);
>>>> dma_dbg_5 = REG_READ(ah, AR_DMADBG_5);
>>>> dma_dbg_6 = REG_READ(ah, AR_DMADBG_6);
>>>> dcu_complete_state = dma_dbg_6 & 0x3;
>>>> if (dcu_complete_state != 0x1)
>>>> goto exit;
>>>> for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
>>>> if (i < 6)
>>>> chk_dbg = dma_dbg_4;
>>>> else
>>>> chk_dbg = dma_dbg_5;
>>>> dcu_chain_state = (chk_dbg >> (5 * i)) & 0x1f;
>>>> if (dcu_chain_state == 0x6) {
>>>> dcu_wait_frdone = true;
>>>> chk_dcu |= BIT(i);
>>>> }
>>>> }
>>>> if ((dcu_complete_state == 0x1) && dcu_wait_frdone) {
>>>> for_each_set_bit(i, &chk_dcu, ATH9K_NUM_TX_QUEUES) {
>>>> if (ath9k_hw_verify_hang(ah, i))
>>>> return true;
>>>> }
>>>> }
>>>> exit:
>>>> return false;
>>>> }
>>>>
>>>> The for loop seems to need to look like the following:
>>>>
>>>> for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
>>>> int off=i;
>>>> if (i < 6) {
>>>> chk_dbg = dma_dbg_4;
>>>> } else {
>>>> chk_dbg = dma_dbg_5;
>>>> off = i - 6;
>>>> }
>>>> dcu_chain_state = (chk_dbg >> (5 * off)) & 0x1f;
>>>> if (dcu_chain_state == 0x6) {
>>>> dcu_wait_frdone = true;
>>>> chk_dcu |= BIT(i);
>>>> }
>>>> }
>>>>
>>>
>>> Did you test this? Please send a proper patch :)
>>>
>>> -Toke
>>
>

2023-04-13 22:18:09

by Toke Høiland-Jørgensen

[permalink] [raw]
Subject: Re: shift exponent 35 is too large @ ath/ath9k/ar9003_hw.c:1147

Peter Seiderer <[email protected]> writes:

> Hello Gregg, Toke,
>
> On Thu, 30 Mar 2023 08:44:25 -0500, Gregg Wonderly <[email protected]> wrote:
>
>> I have not tested this. I am in the middle of testing on this machine of many other things and building a kernel right now is not on my timeline. Note that I have a magic 6 constant in there. I derived this from dividing 32 by the bit mask 0x1f width of 5. But looking further at this, it seems like chk_dbg should actually be a u64, and dma_dbg_4 and dma_dbg_5 placed into that value as a continuous 64bit value. But again, I don’t know if there are 2 bits at the top of dma_dbg_4 and 3 bits at the bottom of dma_dbg_5 that go together. This needs to be fixed by someone with the time and the knowledge of what’s going on in the hardware.
>
> The comment from drivers/net/wireless/ath/ath9k/ar9003_hw.c only some
> lines above seems to support your reasoning (for the '6' constant, not
> for the packaging into an u64 - bit 30-31 unused):
>
> 1073 /*
> 1074 * MAC HW hang check
> 1075 * =================
> 1076 *
> 1077 * Signature: dcu_chain_state is 0x6 and dcu_complete_state is 0x1.
> 1078 *
> 1079 * The state of each DCU chain (mapped to TX queues) is available from these
> 1080 * DMA debug registers:
> 1081 *
> 1082 * Chain 0 state : Bits 4:0 of AR_DMADBG_4
> 1083 * Chain 1 state : Bits 9:5 of AR_DMADBG_4
> 1084 * Chain 2 state : Bits 14:10 of AR_DMADBG_4
> 1085 * Chain 3 state : Bits 19:15 of AR_DMADBG_4
> 1086 * Chain 4 state : Bits 24:20 of AR_DMADBG_4
> 1087 * Chain 5 state : Bits 29:25 of AR_DMADBG_4
> 1088 * Chain 6 state : Bits 4:0 of AR_DMADBG_5
> 1089 * Chain 7 state : Bits 9:5 of AR_DMADBG_5
> 1090 * Chain 8 state : Bits 14:10 of AR_DMADBG_5
> 1091 * Chain 9 state : Bits 19:15 of AR_DMADBG_5
> 1092 *
> 1093 * The DCU chain state "0x6" means "WAIT_FRDONE" - wait for TX frame to be done.
> 1094 */
>
> But with the same/similar bug some lines below (dma_dbg_chain is AR_DMADBG_4
> for queue < 6 and AR_DMADBG_5 above):
>
> 1112 dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;

Okay, here is a patch fixing both places; could one of you please test
it?

-Toke

diff --git a/drivers/net/wireless/ath/ath9k/ar9003_hw.c b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
index 4f27a9fb1482..2e8570baabf6 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_hw.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
@@ -1099,17 +1099,22 @@ static bool ath9k_hw_verify_hang(struct ath_hw *ah, unsigned int queue)
{
u32 dma_dbg_chain, dma_dbg_complete;
u8 dcu_chain_state, dcu_complete_state;
+ unsigned int dbg_reg, offset;
int i;

- for (i = 0; i < NUM_STATUS_READS; i++) {
- if (queue < 6)
- dma_dbg_chain = REG_READ(ah, AR_DMADBG_4);
- else
- dma_dbg_chain = REG_READ(ah, AR_DMADBG_5);
+ if (queue < 6) {
+ dbg_reg = AR_DMADBG_4;
+ offset = queue;
+ } else {
+ dbg_reg = AR_DMADBG_5;
+ offset = queue - 6;
+ }

+ for (i = 0; i < NUM_STATUS_READS; i++) {
+ dma_dbg_chain = REG_READ(ah, dbg_reg);
dma_dbg_complete = REG_READ(ah, AR_DMADBG_6);

- dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;
+ dcu_chain_state = (dma_dbg_chain >> (5 * offset)) & 0x1f;
dcu_complete_state = dma_dbg_complete & 0x3;

if ((dcu_chain_state != 0x6) || (dcu_complete_state != 0x1))
@@ -1139,12 +1144,17 @@ static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah)
goto exit;

for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
- if (i < 6)
+ unsigned int offset;
+
+ if (i < 6) {
chk_dbg = dma_dbg_4;
- else
+ offset = i;
+ } else {
chk_dbg = dma_dbg_5;
+ offset = i - 6;
+ }

- dcu_chain_state = (chk_dbg >> (5 * i)) & 0x1f;
+ dcu_chain_state = (chk_dbg >> (5 * offset)) & 0x1f;
if (dcu_chain_state == 0x6) {
dcu_wait_frdone = true;
chk_dcu |= BIT(i);

2023-04-18 21:24:27

by Peter Seiderer

[permalink] [raw]
Subject: Re: shift exponent 35 is too large @ ath/ath9k/ar9003_hw.c:1147

Hello Toke,

On Fri, 14 Apr 2023 00:17:04 +0200, Toke Høiland-Jørgensen <[email protected]> wrote:

> Peter Seiderer <[email protected]> writes:
>
> > Hello Gregg, Toke,
> >
> > On Thu, 30 Mar 2023 08:44:25 -0500, Gregg Wonderly <[email protected]> wrote:
> >
> >> I have not tested this. I am in the middle of testing on this machine of many other things and building a kernel right now is not on my timeline. Note that I have a magic 6 constant in there. I derived this from dividing 32 by the bit mask 0x1f width of 5. But looking further at this, it seems like chk_dbg should actually be a u64, and dma_dbg_4 and dma_dbg_5 placed into that value as a continuous 64bit value. But again, I don’t know if there are 2 bits at the top of dma_dbg_4 and 3 bits at the bottom of dma_dbg_5 that go together. This needs to be fixed by someone with the time and the knowledge of what’s going on in the hardware.
> >
> > The comment from drivers/net/wireless/ath/ath9k/ar9003_hw.c only some
> > lines above seems to support your reasoning (for the '6' constant, not
> > for the packaging into an u64 - bit 30-31 unused):
> >
> > 1073 /*
> > 1074 * MAC HW hang check
> > 1075 * =================
> > 1076 *
> > 1077 * Signature: dcu_chain_state is 0x6 and dcu_complete_state is 0x1.
> > 1078 *
> > 1079 * The state of each DCU chain (mapped to TX queues) is available from these
> > 1080 * DMA debug registers:
> > 1081 *
> > 1082 * Chain 0 state : Bits 4:0 of AR_DMADBG_4
> > 1083 * Chain 1 state : Bits 9:5 of AR_DMADBG_4
> > 1084 * Chain 2 state : Bits 14:10 of AR_DMADBG_4
> > 1085 * Chain 3 state : Bits 19:15 of AR_DMADBG_4
> > 1086 * Chain 4 state : Bits 24:20 of AR_DMADBG_4
> > 1087 * Chain 5 state : Bits 29:25 of AR_DMADBG_4
> > 1088 * Chain 6 state : Bits 4:0 of AR_DMADBG_5
> > 1089 * Chain 7 state : Bits 9:5 of AR_DMADBG_5
> > 1090 * Chain 8 state : Bits 14:10 of AR_DMADBG_5
> > 1091 * Chain 9 state : Bits 19:15 of AR_DMADBG_5
> > 1092 *
> > 1093 * The DCU chain state "0x6" means "WAIT_FRDONE" - wait for TX frame to be done.
> > 1094 */
> >
> > But with the same/similar bug some lines below (dma_dbg_chain is AR_DMADBG_4
> > for queue < 6 and AR_DMADBG_5 above):
> >
> > 1112 dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;
>
> Okay, here is a patch fixing both places; could one of you please test
> it?

Sorry for the delayed answer..., did prepare already a similar patch (but
without the optimization of moving out the dbg_reg/offset out of the for-
loop in ath9k_hw_verify_hang) and tested it via some additional debug
output....

In IBSS mode with iperf running in both directions all 1 to 3 hours
ar9003_hw_detect_mac_hang() triggers the additional check/call
to ath9k_hw_verify_hang() but always without real hang outcome...

Some (minor) style questions below...

>
> -Toke
>
> diff --git a/drivers/net/wireless/ath/ath9k/ar9003_hw.c b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
> index 4f27a9fb1482..2e8570baabf6 100644
> --- a/drivers/net/wireless/ath/ath9k/ar9003_hw.c
> +++ b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
> @@ -1099,17 +1099,22 @@ static bool ath9k_hw_verify_hang(struct ath_hw *ah, unsigned int queue)
> {
> u32 dma_dbg_chain, dma_dbg_complete;
> u8 dcu_chain_state, dcu_complete_state;
> + unsigned int dbg_reg, offset;
> int i;
>
> - for (i = 0; i < NUM_STATUS_READS; i++) {
> - if (queue < 6)
> - dma_dbg_chain = REG_READ(ah, AR_DMADBG_4);
> - else
> - dma_dbg_chain = REG_READ(ah, AR_DMADBG_5);
> + if (queue < 6) {
> + dbg_reg = AR_DMADBG_4;
> + offset = queue;

Or calculate the 'real' offset here:

offset = queue * 5;

> + } else {
> + dbg_reg = AR_DMADBG_5;
> + offset = queue - 6;

offset = (queue - 6) * 5;
> + }
>
> + for (i = 0; i < NUM_STATUS_READS; i++) {
> + dma_dbg_chain = REG_READ(ah, dbg_reg);
> dma_dbg_complete = REG_READ(ah, AR_DMADBG_6);
>
> - dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;
> + dcu_chain_state = (dma_dbg_chain >> (5 * offset)) & 0x1f;

And a slightly simpler calculation here:

dcu_chain_state = (dma_dbg_chain >> offset) & 0x1f;

Or alternative (without offset var) solution:

dcu_chain_state = (dma_dbg_chain >> (5 * (queue % 6))) & 0x1f;

Do you prefer to convert your suggestion into a complete patch/commit or
should I update mine (incorporating the optimization of moving out the
dbg_reg/offset out of the for-loop) and send to the mailing list?

Regards,
Peter


> dcu_complete_state = dma_dbg_complete & 0x3;
>
> if ((dcu_chain_state != 0x6) || (dcu_complete_state != 0x1))
> @@ -1139,12 +1144,17 @@ static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah)
> goto exit;
>
> for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
> - if (i < 6)
> + unsigned int offset;
> +
> + if (i < 6) {
> chk_dbg = dma_dbg_4;
> - else
> + offset = i;
> + } else {
> chk_dbg = dma_dbg_5;
> + offset = i - 6;
> + }
>
> - dcu_chain_state = (chk_dbg >> (5 * i)) & 0x1f;
> + dcu_chain_state = (chk_dbg >> (5 * offset)) & 0x1f;
> if (dcu_chain_state == 0x6) {
> dcu_wait_frdone = true;
> chk_dcu |= BIT(i);

2023-04-18 23:15:37

by Toke Høiland-Jørgensen

[permalink] [raw]
Subject: Re: shift exponent 35 is too large @ ath/ath9k/ar9003_hw.c:1147

Peter Seiderer <[email protected]> writes:

> Hello Toke,
>
> On Fri, 14 Apr 2023 00:17:04 +0200, Toke Høiland-Jørgensen <[email protected]> wrote:
>
>> Peter Seiderer <[email protected]> writes:
>>
>> > Hello Gregg, Toke,
>> >
>> > On Thu, 30 Mar 2023 08:44:25 -0500, Gregg Wonderly <[email protected]> wrote:
>> >
>> >> I have not tested this. I am in the middle of testing on this machine of many other things and building a kernel right now is not on my timeline. Note that I have a magic 6 constant in there. I derived this from dividing 32 by the bit mask 0x1f width of 5. But looking further at this, it seems like chk_dbg should actually be a u64, and dma_dbg_4 and dma_dbg_5 placed into that value as a continuous 64bit value. But again, I don’t know if there are 2 bits at the top of dma_dbg_4 and 3 bits at the bottom of dma_dbg_5 that go together. This needs to be fixed by someone with the time and the knowledge of what’s going on in the hardware.
>> >
>> > The comment from drivers/net/wireless/ath/ath9k/ar9003_hw.c only some
>> > lines above seems to support your reasoning (for the '6' constant, not
>> > for the packaging into an u64 - bit 30-31 unused):
>> >
>> > 1073 /*
>> > 1074 * MAC HW hang check
>> > 1075 * =================
>> > 1076 *
>> > 1077 * Signature: dcu_chain_state is 0x6 and dcu_complete_state is 0x1.
>> > 1078 *
>> > 1079 * The state of each DCU chain (mapped to TX queues) is available from these
>> > 1080 * DMA debug registers:
>> > 1081 *
>> > 1082 * Chain 0 state : Bits 4:0 of AR_DMADBG_4
>> > 1083 * Chain 1 state : Bits 9:5 of AR_DMADBG_4
>> > 1084 * Chain 2 state : Bits 14:10 of AR_DMADBG_4
>> > 1085 * Chain 3 state : Bits 19:15 of AR_DMADBG_4
>> > 1086 * Chain 4 state : Bits 24:20 of AR_DMADBG_4
>> > 1087 * Chain 5 state : Bits 29:25 of AR_DMADBG_4
>> > 1088 * Chain 6 state : Bits 4:0 of AR_DMADBG_5
>> > 1089 * Chain 7 state : Bits 9:5 of AR_DMADBG_5
>> > 1090 * Chain 8 state : Bits 14:10 of AR_DMADBG_5
>> > 1091 * Chain 9 state : Bits 19:15 of AR_DMADBG_5
>> > 1092 *
>> > 1093 * The DCU chain state "0x6" means "WAIT_FRDONE" - wait for TX frame to be done.
>> > 1094 */
>> >
>> > But with the same/similar bug some lines below (dma_dbg_chain is AR_DMADBG_4
>> > for queue < 6 and AR_DMADBG_5 above):
>> >
>> > 1112 dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;
>>
>> Okay, here is a patch fixing both places; could one of you please test
>> it?
>
> Sorry for the delayed answer..., did prepare already a similar patch (but
> without the optimization of moving out the dbg_reg/offset out of the for-
> loop in ath9k_hw_verify_hang) and tested it via some additional debug
> output....
>
> In IBSS mode with iperf running in both directions all 1 to 3 hours
> ar9003_hw_detect_mac_hang() triggers the additional check/call
> to ath9k_hw_verify_hang() but always without real hang outcome...

Great, thanks!

> Some (minor) style questions below...
>
>>
>> -Toke
>>
>> diff --git a/drivers/net/wireless/ath/ath9k/ar9003_hw.c b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
>> index 4f27a9fb1482..2e8570baabf6 100644
>> --- a/drivers/net/wireless/ath/ath9k/ar9003_hw.c
>> +++ b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
>> @@ -1099,17 +1099,22 @@ static bool ath9k_hw_verify_hang(struct ath_hw *ah, unsigned int queue)
>> {
>> u32 dma_dbg_chain, dma_dbg_complete;
>> u8 dcu_chain_state, dcu_complete_state;
>> + unsigned int dbg_reg, offset;
>> int i;
>>
>> - for (i = 0; i < NUM_STATUS_READS; i++) {
>> - if (queue < 6)
>> - dma_dbg_chain = REG_READ(ah, AR_DMADBG_4);
>> - else
>> - dma_dbg_chain = REG_READ(ah, AR_DMADBG_5);
>> + if (queue < 6) {
>> + dbg_reg = AR_DMADBG_4;
>> + offset = queue;
>
> Or calculate the 'real' offset here:
>
> offset = queue * 5;
>
>> + } else {
>> + dbg_reg = AR_DMADBG_5;
>> + offset = queue - 6;
>
> offset = (queue - 6) * 5;
>> + }
>>
>> + for (i = 0; i < NUM_STATUS_READS; i++) {
>> + dma_dbg_chain = REG_READ(ah, dbg_reg);
>> dma_dbg_complete = REG_READ(ah, AR_DMADBG_6);
>>
>> - dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;
>> + dcu_chain_state = (dma_dbg_chain >> (5 * offset)) & 0x1f;
>
> And a slightly simpler calculation here:
>
> dcu_chain_state = (dma_dbg_chain >> offset) & 0x1f;

Sure, SGTM.

> Or alternative (without offset var) solution:
>
> dcu_chain_state = (dma_dbg_chain >> (5 * (queue % 6))) & 0x1f;

Was trying to avoid the divide (in %) by defining the offset above
(probably a useless optimisation, but, well :)).

> Do you prefer to convert your suggestion into a complete patch/commit or
> should I update mine (incorporating the optimization of moving out the
> dbg_reg/offset out of the for-loop) and send to the mailing list?

I mostly wrote that because I wasn't sure any of y'all would send a
patch; so sure, please go ahead and submit a proper one :)

-Toke

2023-04-19 00:01:48

by Gregg Wonderly

[permalink] [raw]
Subject: Re: shift exponent 35 is too large @ ath/ath9k/ar9003_hw.c:1147

Thanks for picking this up Peter! I am still a week out, at least, before I could work on this!

Gregg Wonderly!

Sent from my iPhone

> On Apr 18, 2023, at 6:03 PM, Toke Høiland-Jørgensen <[email protected]> wrote:
>
> Peter Seiderer <[email protected]> writes:
>
>> Hello Toke,
>>
>>> On Fri, 14 Apr 2023 00:17:04 +0200, Toke Høiland-Jørgensen <[email protected]> wrote:
>>>
>>> Peter Seiderer <[email protected]> writes:
>>>
>>>> Hello Gregg, Toke,
>>>>
>>>> On Thu, 30 Mar 2023 08:44:25 -0500, Gregg Wonderly <[email protected]> wrote:
>>>>
>>>>> I have not tested this. I am in the middle of testing on this machine of many other things and building a kernel right now is not on my timeline. Note that I have a magic 6 constant in there. I derived this from dividing 32 by the bit mask 0x1f width of 5. But looking further at this, it seems like chk_dbg should actually be a u64, and dma_dbg_4 and dma_dbg_5 placed into that value as a continuous 64bit value. But again, I don’t know if there are 2 bits at the top of dma_dbg_4 and 3 bits at the bottom of dma_dbg_5 that go together. This needs to be fixed by someone with the time and the knowledge of what’s going on in the hardware.
>>>>
>>>> The comment from drivers/net/wireless/ath/ath9k/ar9003_hw.c only some
>>>> lines above seems to support your reasoning (for the '6' constant, not
>>>> for the packaging into an u64 - bit 30-31 unused):
>>>>
>>>> 1073 /*
>>>> 1074 * MAC HW hang check
>>>> 1075 * =================
>>>> 1076 *
>>>> 1077 * Signature: dcu_chain_state is 0x6 and dcu_complete_state is 0x1.
>>>> 1078 *
>>>> 1079 * The state of each DCU chain (mapped to TX queues) is available from these
>>>> 1080 * DMA debug registers:
>>>> 1081 *
>>>> 1082 * Chain 0 state : Bits 4:0 of AR_DMADBG_4
>>>> 1083 * Chain 1 state : Bits 9:5 of AR_DMADBG_4
>>>> 1084 * Chain 2 state : Bits 14:10 of AR_DMADBG_4
>>>> 1085 * Chain 3 state : Bits 19:15 of AR_DMADBG_4
>>>> 1086 * Chain 4 state : Bits 24:20 of AR_DMADBG_4
>>>> 1087 * Chain 5 state : Bits 29:25 of AR_DMADBG_4
>>>> 1088 * Chain 6 state : Bits 4:0 of AR_DMADBG_5
>>>> 1089 * Chain 7 state : Bits 9:5 of AR_DMADBG_5
>>>> 1090 * Chain 8 state : Bits 14:10 of AR_DMADBG_5
>>>> 1091 * Chain 9 state : Bits 19:15 of AR_DMADBG_5
>>>> 1092 *
>>>> 1093 * The DCU chain state "0x6" means "WAIT_FRDONE" - wait for TX frame to be done.
>>>> 1094 */
>>>>
>>>> But with the same/similar bug some lines below (dma_dbg_chain is AR_DMADBG_4
>>>> for queue < 6 and AR_DMADBG_5 above):
>>>>
>>>> 1112 dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;
>>>
>>> Okay, here is a patch fixing both places; could one of you please test
>>> it?
>>
>> Sorry for the delayed answer..., did prepare already a similar patch (but
>> without the optimization of moving out the dbg_reg/offset out of the for-
>> loop in ath9k_hw_verify_hang) and tested it via some additional debug
>> output....
>>
>> In IBSS mode with iperf running in both directions all 1 to 3 hours
>> ar9003_hw_detect_mac_hang() triggers the additional check/call
>> to ath9k_hw_verify_hang() but always without real hang outcome...
>
> Great, thanks!
>
>> Some (minor) style questions below...
>>
>>>
>>> -Toke
>>>
>>> diff --git a/drivers/net/wireless/ath/ath9k/ar9003_hw.c b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
>>> index 4f27a9fb1482..2e8570baabf6 100644
>>> --- a/drivers/net/wireless/ath/ath9k/ar9003_hw.c
>>> +++ b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
>>> @@ -1099,17 +1099,22 @@ static bool ath9k_hw_verify_hang(struct ath_hw *ah, unsigned int queue)
>>> {
>>> u32 dma_dbg_chain, dma_dbg_complete;
>>> u8 dcu_chain_state, dcu_complete_state;
>>> + unsigned int dbg_reg, offset;
>>> int i;
>>>
>>> - for (i = 0; i < NUM_STATUS_READS; i++) {
>>> - if (queue < 6)
>>> - dma_dbg_chain = REG_READ(ah, AR_DMADBG_4);
>>> - else
>>> - dma_dbg_chain = REG_READ(ah, AR_DMADBG_5);
>>> + if (queue < 6) {
>>> + dbg_reg = AR_DMADBG_4;
>>> + offset = queue;
>>
>> Or calculate the 'real' offset here:
>>
>> offset = queue * 5;
>>
>>> + } else {
>>> + dbg_reg = AR_DMADBG_5;
>>> + offset = queue - 6;
>>
>> offset = (queue - 6) * 5;
>>> + }
>>>
>>> + for (i = 0; i < NUM_STATUS_READS; i++) {
>>> + dma_dbg_chain = REG_READ(ah, dbg_reg);
>>> dma_dbg_complete = REG_READ(ah, AR_DMADBG_6);
>>>
>>> - dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;
>>> + dcu_chain_state = (dma_dbg_chain >> (5 * offset)) & 0x1f;
>>
>> And a slightly simpler calculation here:
>>
>> dcu_chain_state = (dma_dbg_chain >> offset) & 0x1f;
>
> Sure, SGTM.
>
>> Or alternative (without offset var) solution:
>>
>> dcu_chain_state = (dma_dbg_chain >> (5 * (queue % 6))) & 0x1f;
>
> Was trying to avoid the divide (in %) by defining the offset above
> (probably a useless optimisation, but, well :)).
>
>> Do you prefer to convert your suggestion into a complete patch/commit or
>> should I update mine (incorporating the optimization of moving out the
>> dbg_reg/offset out of the for-loop) and send to the mailing list?
>
> I mostly wrote that because I wasn't sure any of y'all would send a
> patch; so sure, please go ahead and submit a proper one :)
>
> -Toke