2012-10-03 19:08:05

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH 3.7 1/3] ath9k: fix ASPM initialization on resume

ath_pci_aspm_init is only called on card init, so PCI registers get reset
after a suspend/resume cycle.

Signed-off-by: Felix Fietkau <[email protected]>
---
drivers/net/wireless/ath/ath9k/pci.c | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
index c0c5996..ea1d59e 100644
--- a/drivers/net/wireless/ath/ath9k/pci.c
+++ b/drivers/net/wireless/ath/ath9k/pci.c
@@ -333,6 +333,9 @@ static int ath_pci_suspend(struct device *device)
static int ath_pci_resume(struct device *device)
{
struct pci_dev *pdev = to_pci_dev(device);
+ struct ieee80211_hw *hw = pci_get_drvdata(pdev);
+ struct ath_softc *sc = hw->priv;
+ struct ath_common *common = ath9k_hw_common(sc->sc_ah);
u32 val;

/*
@@ -344,6 +347,8 @@ static int ath_pci_resume(struct device *device)
if ((val & 0x0000ff00) != 0)
pci_write_config_dword(pdev, 0x40, val & 0xffff00ff);

+ ath_pci_aspm_init(common);
+
return 0;
}

--
1.7.9.6 (Apple Git-31.1)



2012-10-06 10:46:50

by Sujith Manoharan

[permalink] [raw]
Subject: RE: [PATCH 3.7 2/3] ath9k: improve suspend/resume reliability

On 2012-10-04 4:33 AM, Sujith Manoharan wrote:
> Felix Fietkau wrote:
>> Ensure that drv_start() always returns true, as a failing hw start usually
>> eventually leads to crashes when there's still a station entry present.
>> Call a power-on reset after a resume and after a hw reset failure to bring
>> the hardware back to life again.
>
> In what situations did HW reset (via start() or resume()) fail ?
I don't know what situations caused it, but this happened on a few
ChromeOS devices in the wild, and my patch fixed it.

Well, the cause for the real issue, the HW reset failure is still unknown.
I think we might be papering over some other bug here.

Sujith

2012-10-03 19:08:04

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH 3.7 3/3] ath9k: use ieee80211_free_txskb

Using ieee80211_free_txskb for tx frames is required, since mac80211 clones
skbs for which socket tx status is requested.

Signed-off-by: Felix Fietkau <[email protected]>
Cc: [email protected]
---
drivers/net/wireless/ath/ath9k/beacon.c | 2 +-
drivers/net/wireless/ath/ath9k/main.c | 2 +-
drivers/net/wireless/ath/ath9k/xmit.c | 53 +++++++++++++++++--------------
3 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c
index 76f07d8..1b48414 100644
--- a/drivers/net/wireless/ath/ath9k/beacon.c
+++ b/drivers/net/wireless/ath/ath9k/beacon.c
@@ -120,7 +120,7 @@ static void ath9k_tx_cabq(struct ieee80211_hw *hw, struct sk_buff *skb)

if (ath_tx_start(hw, skb, &txctl) != 0) {
ath_dbg(common, XMIT, "CABQ TX failed\n");
- dev_kfree_skb_any(skb);
+ ieee80211_free_txskb(hw, skb);
}
}

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index e2fe713..dd45edf 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -765,7 +765,7 @@ static void ath9k_tx(struct ieee80211_hw *hw,

return;
exit:
- dev_kfree_skb_any(skb);
+ ieee80211_free_txskb(hw, skb);
}

static void ath9k_stop(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 36618e3..378bd70 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -66,8 +66,7 @@ static void ath_tx_update_baw(struct ath_softc *sc, struct ath_atx_tid *tid,
static struct ath_buf *ath_tx_setup_buffer(struct ath_softc *sc,
struct ath_txq *txq,
struct ath_atx_tid *tid,
- struct sk_buff *skb,
- bool dequeue);
+ struct sk_buff *skb);

enum {
MCS_HT20,
@@ -176,7 +175,15 @@ static void ath_tx_flush_tid(struct ath_softc *sc, struct ath_atx_tid *tid)
fi = get_frame_info(skb);
bf = fi->bf;

- if (bf && fi->retries) {
+ if (!bf) {
+ bf = ath_tx_setup_buffer(sc, txq, tid, skb);
+ if (!bf) {
+ ieee80211_free_txskb(sc->hw, skb);
+ continue;
+ }
+ }
+
+ if (fi->retries) {
list_add_tail(&bf->list, &bf_head);
ath_tx_update_baw(sc, tid, bf->bf_state.seqno);
ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
@@ -785,10 +792,13 @@ static enum ATH_AGGR_STATUS ath_tx_form_aggr(struct ath_softc *sc,
fi = get_frame_info(skb);
bf = fi->bf;
if (!fi->bf)
- bf = ath_tx_setup_buffer(sc, txq, tid, skb, true);
+ bf = ath_tx_setup_buffer(sc, txq, tid, skb);

- if (!bf)
+ if (!bf) {
+ __skb_unlink(skb, &tid->buf_q);
+ ieee80211_free_txskb(sc->hw, skb);
continue;
+ }

bf->bf_state.bf_type = BUF_AMPDU | BUF_AGGR;
seqno = bf->bf_state.seqno;
@@ -1731,9 +1741,11 @@ static void ath_tx_send_ampdu(struct ath_softc *sc, struct ath_atx_tid *tid,
return;
}

- bf = ath_tx_setup_buffer(sc, txctl->txq, tid, skb, false);
- if (!bf)
+ bf = ath_tx_setup_buffer(sc, txctl->txq, tid, skb);
+ if (!bf) {
+ ieee80211_free_txskb(sc->hw, skb);
return;
+ }

bf->bf_state.bf_type = BUF_AMPDU;
INIT_LIST_HEAD(&bf_head);
@@ -1757,11 +1769,6 @@ static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf;

bf = fi->bf;
- if (!bf)
- bf = ath_tx_setup_buffer(sc, txq, tid, skb, false);
-
- if (!bf)
- return;

INIT_LIST_HEAD(&bf_head);
list_add_tail(&bf->list, &bf_head);
@@ -1839,8 +1846,7 @@ u8 ath_txchainmask_reduction(struct ath_softc *sc, u8 chainmask, u32 rate)
static struct ath_buf *ath_tx_setup_buffer(struct ath_softc *sc,
struct ath_txq *txq,
struct ath_atx_tid *tid,
- struct sk_buff *skb,
- bool dequeue)
+ struct sk_buff *skb)
{
struct ath_common *common = ath9k_hw_common(sc->sc_ah);
struct ath_frame_info *fi = get_frame_info(skb);
@@ -1852,7 +1858,7 @@ static struct ath_buf *ath_tx_setup_buffer(struct ath_softc *sc,
bf = ath_tx_get_buffer(sc);
if (!bf) {
ath_dbg(common, XMIT, "TX buffers are full\n");
- goto error;
+ return NULL;
}

ATH_TXBUF_RESET(bf);
@@ -1881,18 +1887,12 @@ static struct ath_buf *ath_tx_setup_buffer(struct ath_softc *sc,
ath_err(ath9k_hw_common(sc->sc_ah),
"dma_mapping_error() on TX\n");
ath_tx_return_buffer(sc, bf);
- goto error;
+ return NULL;
}

fi->bf = bf;

return bf;
-
-error:
- if (dequeue)
- __skb_unlink(skb, &tid->buf_q);
- dev_kfree_skb_any(skb);
- return NULL;
}

/* FIXME: tx power */
@@ -1921,9 +1921,14 @@ static void ath_tx_start_dma(struct ath_softc *sc, struct sk_buff *skb,
*/
ath_tx_send_ampdu(sc, tid, skb, txctl);
} else {
- bf = ath_tx_setup_buffer(sc, txctl->txq, tid, skb, false);
- if (!bf)
+ bf = ath_tx_setup_buffer(sc, txctl->txq, tid, skb);
+ if (!bf) {
+ if (txctl->paprd)
+ dev_kfree_skb_any(skb);
+ else
+ ieee80211_free_txskb(sc->hw, skb);
return;
+ }

bf->bf_state.bfs_paprd = txctl->paprd;

--
1.7.9.6 (Apple Git-31.1)


2012-10-06 10:59:45

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH 3.7 2/3] ath9k: improve suspend/resume reliability

On 2012-10-06 12:46 PM, Manoharan, Sujith wrote:
> On 2012-10-04 4:33 AM, Sujith Manoharan wrote:
>> Felix Fietkau wrote:
>>> Ensure that drv_start() always returns true, as a failing hw start usually
>>> eventually leads to crashes when there's still a station entry present.
>>> Call a power-on reset after a resume and after a hw reset failure to bring
>>> the hardware back to life again.
>>
>> In what situations did HW reset (via start() or resume()) fail ?
>> I don't know what situations caused it, but this happened on a few
>> ChromeOS devices in the wild, and my patch fixed it.
>
> Well, the cause for the real issue, the HW reset failure is still unknown.
> I think we might be papering over some other bug here.
After a suspend/resume cycle, the card definitely needs at least one
power-on reset (since we can assume that the card was power cycled), as
is done on driver initialization as well. My patch ensures that this
happens by setting a flag on resume.

If for some reason the drv_start() reset still fails (and I'm not aware
of any such instances), it's still much better to return 0. The reset
failure will still appear in log messages.

If we return an error code in drv_start, this tends to create a mess of
either WARN_ON/BUG_ON calls triggered at a later point in time (if a
station was connected), or just a complete loss of wifi functionality.

- Felix

2012-10-04 02:33:28

by Sujith Manoharan

[permalink] [raw]
Subject: [PATCH 3.7 2/3] ath9k: improve suspend/resume reliability

Felix Fietkau wrote:
> Ensure that drv_start() always returns true, as a failing hw start usually
> eventually leads to crashes when there's still a station entry present.
> Call a power-on reset after a resume and after a hw reset failure to bring
> the hardware back to life again.

In what situations did HW reset (via start() or resume()) fail ?

Sujith

2012-10-03 19:08:14

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH 3.7 2/3] ath9k: improve suspend/resume reliability

Ensure that drv_start() always returns true, as a failing hw start usually
eventually leads to crashes when there's still a station entry present.
Call a power-on reset after a resume and after a hw reset failure to bring
the hardware back to life again.

Signed-off-by: Felix Fietkau <[email protected]>
---
drivers/net/wireless/ath/ath9k/hw.c | 5 +++++
drivers/net/wireless/ath/ath9k/hw.h | 1 +
drivers/net/wireless/ath/ath9k/main.c | 13 ++++---------
drivers/net/wireless/ath/ath9k/pci.c | 4 +++-
4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index f9a6ec5..8e1559a 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -1450,9 +1450,14 @@ static bool ath9k_hw_set_reset_reg(struct ath_hw *ah, u32 type)
REG_WRITE(ah, AR_RTC_FORCE_WAKE,
AR_RTC_FORCE_WAKE_EN | AR_RTC_FORCE_WAKE_ON_INT);

+ if (!ah->reset_power_on)
+ type = ATH9K_RESET_POWER_ON;
+
switch (type) {
case ATH9K_RESET_POWER_ON:
ret = ath9k_hw_set_reset_power_on(ah);
+ if (!ret)
+ ah->reset_power_on = true;
break;
case ATH9K_RESET_WARM:
case ATH9K_RESET_COLD:
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index 566a4ce..dbc1b7a 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -741,6 +741,7 @@ struct ath_hw {
u32 rfkill_polarity;
u32 ah_flags;

+ bool reset_power_on;
bool htc_reset_init;

enum nl80211_iftype opmode;
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 31ab82e..e2fe713 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -639,8 +639,7 @@ static int ath9k_start(struct ieee80211_hw *hw)
ath_err(common,
"Unable to reset hardware; reset status %d (freq %u MHz)\n",
r, curchan->center_freq);
- spin_unlock_bh(&sc->sc_pcu_lock);
- goto mutex_unlock;
+ ah->reset_power_on = false;
}

/* Setup our intr mask. */
@@ -665,11 +664,8 @@ static int ath9k_start(struct ieee80211_hw *hw)
clear_bit(SC_OP_INVALID, &sc->sc_flags);
sc->sc_ah->is_monitoring = false;

- if (!ath_complete_reset(sc, false)) {
- r = -EIO;
- spin_unlock_bh(&sc->sc_pcu_lock);
- goto mutex_unlock;
- }
+ if (!ath_complete_reset(sc, false))
+ ah->reset_power_on = false;

if (ah->led_pin >= 0) {
ath9k_hw_cfg_output(ah, ah->led_pin,
@@ -688,12 +684,11 @@ static int ath9k_start(struct ieee80211_hw *hw)
if (ah->caps.pcie_lcr_extsync_en && common->bus_ops->extn_synch_en)
common->bus_ops->extn_synch_en(common);

-mutex_unlock:
mutex_unlock(&sc->mutex);

ath9k_ps_restore(sc);

- return r;
+ return 0;
}

static void ath9k_tx(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
index ea1d59e..9fa8c51 100644
--- a/drivers/net/wireless/ath/ath9k/pci.c
+++ b/drivers/net/wireless/ath/ath9k/pci.c
@@ -335,7 +335,8 @@ static int ath_pci_resume(struct device *device)
struct pci_dev *pdev = to_pci_dev(device);
struct ieee80211_hw *hw = pci_get_drvdata(pdev);
struct ath_softc *sc = hw->priv;
- struct ath_common *common = ath9k_hw_common(sc->sc_ah);
+ struct ath_hw *ah = sc->sc_ah;
+ struct ath_common *common = ath9k_hw_common(ah);
u32 val;

/*
@@ -348,6 +349,7 @@ static int ath_pci_resume(struct device *device)
pci_write_config_dword(pdev, 0x40, val & 0xffff00ff);

ath_pci_aspm_init(common);
+ ah->reset_power_on = false;

return 0;
}
--
1.7.9.6 (Apple Git-31.1)


2012-10-04 09:33:58

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH 3.7 2/3] ath9k: improve suspend/resume reliability

On 2012-10-04 4:33 AM, Sujith Manoharan wrote:
> Felix Fietkau wrote:
>> Ensure that drv_start() always returns true, as a failing hw start usually
>> eventually leads to crashes when there's still a station entry present.
>> Call a power-on reset after a resume and after a hw reset failure to bring
>> the hardware back to life again.
>
> In what situations did HW reset (via start() or resume()) fail ?
I don't know what situations caused it, but this happened on a few
ChromeOS devices in the wild, and my patch fixed it.

- Felix