2012-02-01 16:05:19

by Rajkumar Manoharan

[permalink] [raw]
Subject: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

In the following scenario, where the distance b/w STA and AP is ~10m
or in a shield environment by placing an attenuator with reduced AP
txpower, the station started reporting with beacon loss and got
disconnected whenever the chariot endpoint was initiated with BiDi
traffic. In such state, two different stuck cases were observed.

* rx clear is stuck at low for more than 100ms
* dcu chain and complete state is stuck.

This patch detects the stuck state if the beacons are not received for
more than 300ms. In the above matching conditions, trigger a chip
reset to recover. This issue was originally reported in 3.0 kernel with
AR9382 chip by having two stations associated with two different APs in
the same channel and was attenuated/controlled by Azimuth ADEPT-n box.

Cc: Paul Stewart <[email protected]>
Reported-by: Gary Morain <[email protected]>
Signed-off-by: Rajkumar Manoharan <[email protected]>
---
drivers/net/wireless/ath/ath.h | 1 +
drivers/net/wireless/ath/ath9k/ath9k.h | 4 +
drivers/net/wireless/ath/ath9k/init.c | 1 +
drivers/net/wireless/ath/ath9k/main.c | 142 ++++++++++++++++++++++++++++++++
drivers/net/wireless/ath/ath9k/recv.c | 4 +
drivers/net/wireless/ath/hw.c | 5 +
6 files changed, 157 insertions(+), 0 deletions(-)

diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h
index efc0111..f11f397 100644
--- a/drivers/net/wireless/ath/ath.h
+++ b/drivers/net/wireless/ath/ath.h
@@ -150,6 +150,7 @@ struct ath_common {
spinlock_t cc_lock;
struct ath_cycle_counters cc_ani;
struct ath_cycle_counters cc_survey;
+ struct ath_cycle_counters cc_rxpoll;

struct ath_regulatory regulatory;
struct ath_regulatory reg_world_copy;
diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index 171ccf7..748a277 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -306,6 +306,7 @@ struct ath_rx_edma {
struct ath_rx {
u8 defant;
u8 rxotherant;
+ bool stop_rx_poll;
u32 *rxlink;
unsigned int rxfilter;
spinlock_t rxbuflock;
@@ -431,6 +432,8 @@ void ath9k_set_beaconing_status(struct ath_softc *sc, bool status);
void ath_reset_work(struct work_struct *work);
void ath_hw_check(struct work_struct *work);
void ath_hw_pll_work(struct work_struct *work);
+void ath_rx_poll_work(unsigned long data);
+void ath_start_rx_poll(struct ath_softc *sc, const u32 msec);
void ath_paprd_calibrate(struct work_struct *work);
void ath_ani_calibrate(unsigned long data);
void ath_start_ani(struct ath_common *common);
@@ -650,6 +653,7 @@ struct ath_softc {
struct ath_beacon_config cur_beacon_conf;
struct delayed_work tx_complete_work;
struct delayed_work hw_pll_work;
+ struct timer_list rx_poll_timer;
struct ath_btcoex btcoex;
struct ath_mci_coex mci_coex;

diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index abf9435..df49b38 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -844,6 +844,7 @@ int ath9k_init_device(u16 devid, struct ath_softc *sc,
INIT_WORK(&sc->hw_check_work, ath_hw_check);
INIT_WORK(&sc->paprd_work, ath_paprd_calibrate);
INIT_DELAYED_WORK(&sc->hw_pll_work, ath_hw_pll_work);
+ setup_timer(&sc->rx_poll_timer, ath_rx_poll_work, (unsigned long)sc);
sc->last_rssi = ATH_RSSI_DUMMY_MARKER;

ath_init_leds(sc);
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index ec82e92..b4322cb 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -241,6 +241,7 @@ static bool ath_prepare_reset(struct ath_softc *sc, bool retry_tx, bool flush)

sc->hw_busy_count = 0;
del_timer_sync(&common->ani.timer);
+ del_timer_sync(&sc->rx_poll_timer);

ath9k_debug_samp_bb_mac(sc);
ath9k_hw_disable_interrupts(ah);
@@ -282,6 +283,7 @@ static bool ath_complete_reset(struct ath_softc *sc, bool start)

ieee80211_queue_delayed_work(sc->hw, &sc->tx_complete_work, 0);
ieee80211_queue_delayed_work(sc->hw, &sc->hw_pll_work, HZ/2);
+ ath_start_rx_poll(sc, 100);
if (!common->disable_ani)
ath_start_ani(common);
}
@@ -1179,6 +1181,7 @@ static void ath9k_stop(struct ieee80211_hw *hw)
mutex_lock(&sc->mutex);

ath_cancel_work(sc);
+ del_timer_sync(&sc->rx_poll_timer);

if (sc->sc_flags & SC_OP_INVALID) {
ath_dbg(common, ANY, "Device not present\n");
@@ -1418,6 +1421,141 @@ static void ath9k_do_vif_add_setup(struct ieee80211_hw *hw,
}
}

+static bool ath9k_check_dcu_chain_state(u32 dma_dbg, int max_limit,
+ int *hang_state, int *hang_pos)
+{
+ static u32 hang_sign[] = {5, 6, 9};
+ u32 chain_state, dcs_pos, i;
+
+ for (dcs_pos = 0; dcs_pos < max_limit; dcs_pos++) {
+ chain_state = (dma_dbg >> (5 * dcs_pos)) & 0x1f;
+ for (i = 0; i < 3; i++) {
+ if (chain_state == hang_sign[i]) {
+ *hang_state = chain_state;
+ *hang_pos = dcs_pos;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+#define DCU_COMPLETE_STATE 1
+#define NUM_STATUS_READS 50
+static bool ath9k_detect_mac_hang(struct ath_hw *ah)
+{
+ u32 chain_state, comp_state, dcs_reg = AR_DMADBG_4;
+ u32 i, hang_pos, hang_state;
+
+ comp_state = REG_READ(ah, AR_DMADBG_6);
+
+ if ((comp_state & 0x3) != DCU_COMPLETE_STATE) {
+ ath_dbg(ath9k_hw_common(ah), RESET,
+ "MAC Hang signature not found at DCU complete\n");
+ return false;
+ }
+
+ chain_state = REG_READ(ah, dcs_reg);
+ if (ath9k_check_dcu_chain_state(chain_state, 6, &hang_state, &hang_pos))
+ goto hang_check_iter;
+
+ dcs_reg = AR_DMADBG_5;
+ chain_state = REG_READ(ah, dcs_reg);
+ if (ath9k_check_dcu_chain_state(chain_state, 4, &hang_state, &hang_pos))
+ goto hang_check_iter;
+
+ ath_dbg(ath9k_hw_common(ah), RESET,
+ "MAC Hang signature 1 not found\n");
+ return false;
+
+hang_check_iter:
+ ath_dbg(ath9k_hw_common(ah), RESET,
+ "DCU registers: chain %08x complete %08x Hang: state %d pos %d\n",
+ chain_state, comp_state, hang_state, hang_pos);
+
+ for (i = 0; i < NUM_STATUS_READS; i++) {
+ chain_state = REG_READ(ah, dcs_reg);
+ chain_state = (chain_state >> (5 * hang_pos)) & 0x1f;
+ comp_state = REG_READ(ah, AR_DMADBG_6);
+
+ if (((comp_state & 0x3) != DCU_COMPLETE_STATE) ||
+ (chain_state != hang_state))
+ return false;
+ }
+
+ ath_dbg(ath9k_hw_common(ah), RESET, "MAC Hang signature 1 found\n");
+
+ return true;
+}
+
+void ath_start_rx_poll(struct ath_softc *sc, const u32 msec)
+{
+ if (!AR_SREV_9300_20_OR_LATER(sc->sc_ah))
+ return;
+
+ if (!(sc->sc_flags & SC_OP_PRIM_STA_VIF))
+ return;
+
+ mod_timer(&sc->rx_poll_timer, jiffies + msecs_to_jiffies(msec));
+}
+
+void ath_rx_poll_work(unsigned long data)
+{
+ struct ath_softc *sc = (struct ath_softc *)data;
+ struct ath_hw *ah = sc->sc_ah;
+ struct ath_common *common = ath9k_hw_common(ah);
+ static u32 iteration, match_count;
+ static u64 last_run;
+ unsigned long flags;
+ u32 rx_clear, rx, tx, delay = 10;
+
+ spin_lock_bh(&sc->rx.rxbuflock);
+ if (jiffies_to_msecs(jiffies - last_run) > 30)
+ iteration = match_count = 0;
+ else {
+ if (sc->rx.stop_rx_poll && iteration) {
+ spin_unlock_bh(&sc->rx.rxbuflock);
+ iteration = match_count = 0;
+ return;
+ }
+ iteration += 1;
+ }
+ sc->rx.stop_rx_poll = false;
+ spin_unlock_bh(&sc->rx.rxbuflock);
+ sc->ps_flags |= PS_WAIT_FOR_BEACON;
+ ath9k_ps_wakeup(sc);
+
+ spin_lock_irqsave(&common->cc_lock, flags);
+ ath_hw_cycle_counters_update(common);
+
+ rx_clear = common->cc_rxpoll.rx_busy * 100 / common->cc_rxpoll.cycles;
+ rx = common->cc_rxpoll.rx_frame * 100 / common->cc_rxpoll.cycles;
+ tx = common->cc_rxpoll.tx_frame * 100 / common->cc_rxpoll.cycles;
+ memset(&common->cc_rxpoll, 0, sizeof(common->cc_rxpoll));
+ spin_unlock_irqrestore(&common->cc_lock, flags);
+
+ last_run = jiffies;
+ if (rx_clear > 98) {
+ ath_dbg(common, RESET,
+ "rx clear %d match count %d iteration %d\n",
+ rx_clear, match_count, iteration);
+ if (match_count++ > 9)
+ goto queue_reset_work;
+ } else if (ath9k_detect_mac_hang(ah))
+ goto queue_reset_work;
+ else if (iteration >= 15) {
+ iteration = match_count = 0;
+ delay = 200;
+ }
+ ath9k_ps_restore(sc);
+ ath_start_rx_poll(sc, delay);
+ return;
+
+queue_reset_work:
+ ath9k_ps_restore(sc);
+ ieee80211_queue_work(sc->hw, &sc->hw_reset_work);
+ iteration = match_count = 0;
+}

static int ath9k_add_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
@@ -1948,6 +2086,8 @@ static void ath9k_bss_iter(void *data, u8 *mac, struct ieee80211_vif *vif)
if (!common->disable_ani) {
sc->sc_flags |= SC_OP_ANI_RUN;
ath_start_ani(common);
+ sc->rx.stop_rx_poll = false;
+ ath_start_rx_poll(sc, 300);
}

}
@@ -1984,6 +2124,7 @@ static void ath9k_config_bss(struct ath_softc *sc, struct ieee80211_vif *vif)
/* Stop ANI */
sc->sc_flags &= ~SC_OP_ANI_RUN;
del_timer_sync(&common->ani.timer);
+ del_timer_sync(&sc->rx_poll_timer);
memset(&sc->caldata, 0, sizeof(sc->caldata));
}
}
@@ -2027,6 +2168,7 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw,
} else {
sc->sc_flags &= ~SC_OP_ANI_RUN;
del_timer_sync(&common->ani.timer);
+ del_timer_sync(&sc->rx_poll_timer);
}
}

diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index 0e666fb..989b1ee 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c
@@ -1837,6 +1837,10 @@ int ath_rx_tasklet(struct ath_softc *sc, int flush, bool hp)
if (sc->sc_flags & SC_OP_RXFLUSH)
goto requeue_drop_frag;

+ if (rs.is_mybeacon) {
+ sc->rx.stop_rx_poll = true;
+ ath_start_rx_poll(sc, 300);
+ }
rxs->mactime = (tsf & ~0xffffffffULL) | rs.rs_tstamp;
if (rs.rs_tstamp > tsf_lower &&
unlikely(rs.rs_tstamp - tsf_lower > 0x10000000))
diff --git a/drivers/net/wireless/ath/hw.c b/drivers/net/wireless/ath/hw.c
index 19befb3..f1821ea 100644
--- a/drivers/net/wireless/ath/hw.c
+++ b/drivers/net/wireless/ath/hw.c
@@ -166,6 +166,11 @@ void ath_hw_cycle_counters_update(struct ath_common *common)
common->cc_survey.rx_busy += busy;
common->cc_survey.rx_frame += rx;
common->cc_survey.tx_frame += tx;
+
+ common->cc_rxpoll.cycles += cycles;
+ common->cc_rxpoll.rx_busy += busy;
+ common->cc_rxpoll.rx_frame += rx;
+ common->cc_rxpoll.tx_frame += tx;
}
EXPORT_SYMBOL(ath_hw_cycle_counters_update);

--
1.7.9



2012-02-02 03:49:08

by Rajkumar Manoharan

[permalink] [raw]
Subject: Re: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

On Wed, Feb 01, 2012 at 11:35:13AM -0800, Ben Greear wrote:
> On 02/01/2012 08:05 AM, Rajkumar Manoharan wrote:
>
> Some more comments below.....
>
>
> >+static bool ath9k_check_dcu_chain_state(u32 dma_dbg, int max_limit,
> >+ int *hang_state, int *hang_pos)
> >+{
> >+ static u32 hang_sign[] = {5, 6, 9};
> >+ u32 chain_state, dcs_pos, i;
> >+
> >+ for (dcs_pos = 0; dcs_pos< max_limit; dcs_pos++) {
> >+ chain_state = (dma_dbg>> (5 * dcs_pos))& 0x1f;
> >+ for (i = 0; i< 3; i++) {
> >+ if (chain_state == hang_sign[i]) {
> >+ *hang_state = chain_state;
> >+ *hang_pos = dcs_pos;
> >+ return true;
> >+ }
> >+ }
> >+ }
> >+ return false;
> >+}
>
> Perhaps you could add some comments to the code above to describe
> what the '5, 6, 9' and other constants mean?
>
sure. these are the dcu_chain_state values. will update the variable name.
> >+#define DCU_COMPLETE_STATE 1
> >+#define NUM_STATUS_READS 50
> >+static bool ath9k_detect_mac_hang(struct ath_hw *ah)
> >+{
> >+ u32 chain_state, comp_state, dcs_reg = AR_DMADBG_4;
> >+ u32 i, hang_pos, hang_state;
> >+
> >+ comp_state = REG_READ(ah, AR_DMADBG_6);
> >+
> >+ if ((comp_state& 0x3) != DCU_COMPLETE_STATE) {
> >+ ath_dbg(ath9k_hw_common(ah), RESET,
> >+ "MAC Hang signature not found at DCU complete\n");
> >+ return false;
> >+ }
>
> Same with the 0x3 (maybe #define what those bits mean and or them together instead
> of using the 0x3?)
>
ok.
> >+
> >+ chain_state = REG_READ(ah, dcs_reg);
> >+ if (ath9k_check_dcu_chain_state(chain_state, 6,&hang_state,&hang_pos))
> >+ goto hang_check_iter;
>
> And why did you choose a '6' here?
>
number of dcu chain states consisted in that dma_dbg register. I'll update in the
function prototype.
> >+
> >+ dcs_reg = AR_DMADBG_5;
> >+ chain_state = REG_READ(ah, dcs_reg);
> >+ if (ath9k_check_dcu_chain_state(chain_state, 4,&hang_state,&hang_pos))
> >+ goto hang_check_iter;
>
> And the '4'?
>
same as above.
> >+void ath_start_rx_poll(struct ath_softc *sc, const u32 msec)
> >+{
> >+ if (!AR_SREV_9300_20_OR_LATER(sc->sc_ah))
> >+ return;
> >+
> >+ if (!(sc->sc_flags& SC_OP_PRIM_STA_VIF))
> >+ return;
> >+
> >+ mod_timer(&sc->rx_poll_timer, jiffies + msecs_to_jiffies(msec));
> >+}
>
> Since the rx-poll timer isn't started when SC_OP_PRIM_STA_VIF is
> not set, should you always call the ath_start_rx_poll method
> even if ani is disabled in the ath9k_bss_iter method since
> the PRIM_STA_VIF flag is set earlier in that code)?
>
Goog catch. it should be out of ani check.
> > static int ath9k_add_interface(struct ieee80211_hw *hw,
> > struct ieee80211_vif *vif)
> >@@ -1948,6 +2086,8 @@ static void ath9k_bss_iter(void *data, u8 *mac, struct ieee80211_vif *vif)
> > if (!common->disable_ani) {
> > sc->sc_flags |= SC_OP_ANI_RUN;
> > ath_start_ani(common);
> >+ sc->rx.stop_rx_poll = false;
> >+ ath_start_rx_poll(sc, 300);

Thanks for your comments.

--
Rajkumar

2012-02-06 07:09:55

by Rajkumar Manoharan

[permalink] [raw]
Subject: Re: [PATCH 2/2] ath9k_hw: improve ANI processing and rx desensitizing parameters

On Sat, Feb 04, 2012 at 02:28:30AM -0800, Adrian Chadd wrote:
> My point is that "use self corr low" isn't exactly and only "weak
> signal detection". My understanding is there's a whole heap of various
> PHY twiddles that set thresholds in the OFDM/CCK decoder.
> Disabling weak detection involves setting those thresholds high so
> they don't trigger on weak signals.
>
> I'd like to suggest we hold off on committing anything that really
> changes the ANI behaviour without figuring out exactly what's going on
> underneath the hood. I think it's worthwhile filing a bugzilla report
> and stuffing that patch against the bug though, so it's not "lost".
>
> I can get some further information next week by paying the
> baseband/radio guys a visit. I'll see if we can figure out what's
> going on and why your ANI changes improve in that specific
> environment.
>
> (As a side note - it'd be nice to eliminate more 'stuck beacon' issues
> that relate to the radio side of things.)
>
Adrian,

To be very precise, there are two types of weak signal detection logic in our
baseband -

1) normal weak signal detection logic - involves thresholds m1_thresh,
m2_thresh, m2_Count_thr
2) low weak signal detection logic - involves thresholds m1_thresh_low,
m2_thresh_low, m2_count_thr_low

Type 2 is intended to handle low weak signals and internally baseband logic does
averaging over larger time windows compared to type 1. In current ANI code,
both types 1) and 2) are clubbed together and called as "weak signal detection".

What we've found in extensive testing is that disabling type 1 and type 2 form of
weak signal detection logic together causes "beacon miss". Disabling both types
1 and type 2 together is definitely harmful to our receiver as it cannot receive
even medium strength signals. Disabling type 2 alone is completely safe which
is what we're doing in this change.

For disabling type 2 logic alone there is a register field named
"use_self_corr_low". So instead of maxing out threshold values of 3-4 registers
we simply set use_self_corr_low=0 saving register writes. Hepe this could
clarify your question.

--
Rajkumar

2012-02-03 06:04:10

by Rajkumar Manoharan

[permalink] [raw]
Subject: Re: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

On Thu, Feb 02, 2012 at 01:58:21PM -0800, Adrian Chadd wrote:
> On 1 February 2012 20:09, Sujith Manoharan <[email protected]>wrote:
>
>
> > > > I have not reviewed the patch, but to do such workarounds for an entire
> > > > family of chips is overkill. Has it been confirmed that the other
> > > > chips falling under this SREV check require this snafu ?
> > > >
> > > unfortunately this snafu is applicable for all ar9380 MAC based chips ;)
> > > and confirmed the same.
> >
> > Ok. But this stuff really needs to be verified for other chips.
> > These 'poll' workarounds are piling up.
> >
>
> Hi,
>
> Is there a bugzilla.kernel.org bug for this particular behaviour? And if
> you can dig up the internal bug number, please post it privately.
>
No. This issue was originally reported by Google in their setup.

--
Rajkumar

2012-02-01 19:03:28

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

On 02/01/2012 10:50 AM, Rajkumar Manoharan wrote:
> On Wed, Feb 01, 2012 at 09:31:50AM -0800, Ben Greear wrote:
>> On 02/01/2012 08:05 AM, Rajkumar Manoharan wrote:
>>> In the following scenario, where the distance b/w STA and AP is ~10m
>>> or in a shield environment by placing an attenuator with reduced AP
>>> txpower, the station started reporting with beacon loss and got
>>> disconnected whenever the chariot endpoint was initiated with BiDi
>>> traffic. In such state, two different stuck cases were observed.
>>>
>>> * rx clear is stuck at low for more than 100ms
>>> * dcu chain and complete state is stuck.
>>>
>>> This patch detects the stuck state if the beacons are not received for
>>> more than 300ms. In the above matching conditions, trigger a chip
>>> reset to recover. This issue was originally reported in 3.0 kernel with
>>> AR9382 chip by having two stations associated with two different APs in
>>> the same channel and was attenuated/controlled by Azimuth ADEPT-n box.
>>
>> Can't the AP be configured for larger beacon intervals? Maybe have it
>> be some multiple of that instead of a fixed 300ms?
>>
> Yes. It can. But the detection log will look for specific signature and
> meanwhile if the beacons are received, the inprogress logic will be aborted.
> I believe that 300ms is not too short or too long to trigger the detection.
> Isn't it?

If the beacon interval is 500ms, then you could easily not get a beacon
during your 300ms interval. If the beacon logic is just a backup, and it doesn't
matter if you don't see the beacon, then probably there is no problem.
I just wanted to make sure you had thought about that issue and that there
would not be spurious fixup logic called if the beacon timer is large.

Thanks,
Ben

>
> -Rajkumar


--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com


2012-02-02 04:46:34

by Rajkumar Manoharan

[permalink] [raw]
Subject: Re: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

On Thu, Feb 02, 2012 at 09:39:36AM +0530, Sujith Manoharan wrote:
> Rajkumar Manoharan wrote:
> > On Thu, Feb 02, 2012 at 08:38:32AM +0530, Sujith Manoharan wrote:
> > > Rajkumar Manoharan wrote:
> > > > +void ath_start_rx_poll(struct ath_softc *sc, const u32 msec)
> > > > +{
> > > > + if (!AR_SREV_9300_20_OR_LATER(sc->sc_ah))
> > > > + return;
> > >
> > > I have not reviewed the patch, but to do such workarounds for an entire
> > > family of chips is overkill. Has it been confirmed that the other
> > > chips falling under this SREV check require this snafu ?
> > >
> > unfortunately this snafu is applicable for all ar9380 MAC based chips ;)
> > and confirmed the same.
>
> Ok. But this stuff really needs to be verified for other chips.
> These 'poll' workarounds are piling up.
>
Agree. But sometimes these 'poll' WARs are being inevitable. As this patch was
verified in ar938x chips, i update the SREV to check only for 938x.

--
Rajkumar

2012-02-02 03:56:41

by Rajkumar Manoharan

[permalink] [raw]
Subject: Re: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

On Thu, Feb 02, 2012 at 08:38:32AM +0530, Sujith Manoharan wrote:
> Rajkumar Manoharan wrote:
> > +void ath_start_rx_poll(struct ath_softc *sc, const u32 msec)
> > +{
> > + if (!AR_SREV_9300_20_OR_LATER(sc->sc_ah))
> > + return;
>
> I have not reviewed the patch, but to do such workarounds for an entire
> family of chips is overkill. Has it been confirmed that the other
> chips falling under this SREV check require this snafu ?
>
unfortunately this snafu is applicable for all ar9380 MAC based chips ;)
and confirmed the same.

--
Rajkumar

2012-02-04 10:28:31

by Adrian Chadd

[permalink] [raw]
Subject: Re: [PATCH 2/2] ath9k_hw: improve ANI processing and rx desensitizing parameters

My point is that "use self corr low" isn't exactly and only "weak
signal detection". My understanding is there's a whole heap of various
PHY twiddles that set thresholds in the OFDM/CCK decoder.
Disabling weak detection involves setting those thresholds high so
they don't trigger on weak signals.

I'd like to suggest we hold off on committing anything that really
changes the ANI behaviour without figuring out exactly what's going on
underneath the hood. I think it's worthwhile filing a bugzilla report
and stuffing that patch against the bug though, so it's not "lost".

I can get some further information next week by paying the
baseband/radio guys a visit. I'll see if we can figure out what's
going on and why your ANI changes improve in that specific
environment.

(As a side note - it'd be nice to eliminate more 'stuck beacon' issues
that relate to the radio side of things.)


Adrian

2012-02-02 03:08:46

by Sujith Manoharan

[permalink] [raw]
Subject: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

Rajkumar Manoharan wrote:
> +void ath_start_rx_poll(struct ath_softc *sc, const u32 msec)
> +{
> + if (!AR_SREV_9300_20_OR_LATER(sc->sc_ah))
> + return;

I have not reviewed the patch, but to do such workarounds for an entire
family of chips is overkill. Has it been confirmed that the other
chips falling under this SREV check require this snafu ?

Sujith

2012-02-01 19:35:21

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

On 02/01/2012 08:05 AM, Rajkumar Manoharan wrote:
> In the following scenario, where the distance b/w STA and AP is ~10m
> or in a shield environment by placing an attenuator with reduced AP
> txpower, the station started reporting with beacon loss and got
> disconnected whenever the chariot endpoint was initiated with BiDi
> traffic. In such state, two different stuck cases were observed.
>
> * rx clear is stuck at low for more than 100ms
> * dcu chain and complete state is stuck.
>
> This patch detects the stuck state if the beacons are not received for
> more than 300ms. In the above matching conditions, trigger a chip
> reset to recover. This issue was originally reported in 3.0 kernel with
> AR9382 chip by having two stations associated with two different APs in
> the same channel and was attenuated/controlled by Azimuth ADEPT-n box.
>
> Cc: Paul Stewart<[email protected]>
> Reported-by: Gary Morain<[email protected]>
> Signed-off-by: Rajkumar Manoharan<[email protected]>

Some more comments below.....


> +static bool ath9k_check_dcu_chain_state(u32 dma_dbg, int max_limit,
> + int *hang_state, int *hang_pos)
> +{
> + static u32 hang_sign[] = {5, 6, 9};
> + u32 chain_state, dcs_pos, i;
> +
> + for (dcs_pos = 0; dcs_pos< max_limit; dcs_pos++) {
> + chain_state = (dma_dbg>> (5 * dcs_pos))& 0x1f;
> + for (i = 0; i< 3; i++) {
> + if (chain_state == hang_sign[i]) {
> + *hang_state = chain_state;
> + *hang_pos = dcs_pos;
> + return true;
> + }
> + }
> + }
> + return false;
> +}

Perhaps you could add some comments to the code above to describe
what the '5, 6, 9' and other constants mean?

> +#define DCU_COMPLETE_STATE 1
> +#define NUM_STATUS_READS 50
> +static bool ath9k_detect_mac_hang(struct ath_hw *ah)
> +{
> + u32 chain_state, comp_state, dcs_reg = AR_DMADBG_4;
> + u32 i, hang_pos, hang_state;
> +
> + comp_state = REG_READ(ah, AR_DMADBG_6);
> +
> + if ((comp_state& 0x3) != DCU_COMPLETE_STATE) {
> + ath_dbg(ath9k_hw_common(ah), RESET,
> + "MAC Hang signature not found at DCU complete\n");
> + return false;
> + }

Same with the 0x3 (maybe #define what those bits mean and or them together instead
of using the 0x3?)

> +
> + chain_state = REG_READ(ah, dcs_reg);
> + if (ath9k_check_dcu_chain_state(chain_state, 6,&hang_state,&hang_pos))
> + goto hang_check_iter;

And why did you choose a '6' here?

> +
> + dcs_reg = AR_DMADBG_5;
> + chain_state = REG_READ(ah, dcs_reg);
> + if (ath9k_check_dcu_chain_state(chain_state, 4,&hang_state,&hang_pos))
> + goto hang_check_iter;

And the '4'?

> +void ath_start_rx_poll(struct ath_softc *sc, const u32 msec)
> +{
> + if (!AR_SREV_9300_20_OR_LATER(sc->sc_ah))
> + return;
> +
> + if (!(sc->sc_flags& SC_OP_PRIM_STA_VIF))
> + return;
> +
> + mod_timer(&sc->rx_poll_timer, jiffies + msecs_to_jiffies(msec));
> +}

Since the rx-poll timer isn't started when SC_OP_PRIM_STA_VIF is
not set, should you always call the ath_start_rx_poll method
even if ani is disabled in the ath9k_bss_iter method since
the PRIM_STA_VIF flag is set earlier in that code)?

> static int ath9k_add_interface(struct ieee80211_hw *hw,
> struct ieee80211_vif *vif)
> @@ -1948,6 +2086,8 @@ static void ath9k_bss_iter(void *data, u8 *mac, struct ieee80211_vif *vif)
> if (!common->disable_ani) {
> sc->sc_flags |= SC_OP_ANI_RUN;
> ath_start_ani(common);
> + sc->rx.stop_rx_poll = false;
> + ath_start_rx_poll(sc, 300);
> }
>
> }

Thanks,
Ben

--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com


2012-02-02 04:56:14

by Sujith Manoharan

[permalink] [raw]
Subject: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

Rajkumar Manoharan wrote:
> + bool stop_rx_poll;

What exactly is this variable for ?
Why can't the timer be just started/stopped/modified using the
normal timer API ?

And Ben has a point too. Different beacon intervals need to be handled,
which can be done easily by just using the timestamp of the last received
beacon and the beacon interval. Then all this fragile logic can be removed.

> + if (!(sc->sc_flags & SC_OP_PRIM_STA_VIF))
> + return;

This needs locking, no ?

> + sc->rx.stop_rx_poll = false;
> + spin_unlock_bh(&sc->rx.rxbuflock);
> + sc->ps_flags |= PS_WAIT_FOR_BEACON;

PS_WAIT_FOR_BEACON is being used randomly everywhere, with no proper locking,
but this needs to be handled properly here, else station mode powersave would be buried
under another layer of poo.

Sujith

2012-02-10 00:55:08

by Adrian Chadd

[permalink] [raw]
Subject: Re: [PATCH 2/2] ath9k_hw: improve ANI processing and rx desensitizing parameters

Hi,

I've done some digging here. Yes, you're right, the big clobber isn't
the right solution.

What I've been told is:

* with such a strong blocker and a weak signal (~ 5dB maximum above
blocker) that the strong signal detection logic won't fire;
* and if weak signal detection is disabled entirely as it's done at
the moment, you will likely decode very little.

So the solution of disabling sfcorr_low is right (as that's supposed
to be for super-low signals and in the presence of such a blocker, it
won't ever help) and leaving the first level low threshold detection
is "right".

I still can't help but wonder if there's a situation where the big
"disable" is needed.

Eg, in the presence of a very strong signal (ie, where strong signal
detection will fire) but a weak blocker; you may wish to disable both
so you don't end up with high levels of spurious weak signal
detections and possibly OFDM restarts (ie, the hardware would then
detect an increase in power during a weak OFDM frame decode -> OFDM
restart.)

So how about we treat this as an excuse to turn OFDM weak signal
detection into three levels - entirely on, disable low weak detect,
disable medium weak detect - and see if this is enough. I'd like to
not break things for existing users where the "big thunk" is good
enough.

What do you think?


Adrian

2012-02-02 22:09:11

by Adrian Chadd

[permalink] [raw]
Subject: Re: [PATCH 2/2] ath9k_hw: improve ANI processing and rx desensitizing parameters

Hi, questions below:

On 1 February 2012 08:05, Rajkumar Manoharan <[email protected]> wrote:
>
> This patch improves ANI operations by switching among the immunity
> levels based on PHY errors and beacon rssi which will adjust receiver
> desensitizing parameters. The changes are

What environments are you seeing this bad behaviour in? I'd just like
to be sure that this won't mess up the behaviour of ANI for other
chips, other environments and other operating modes.

> * Irrespective of opmode, the Weak Signal Detection is with the current
> ?immunity level value.

I didn't think that weak signal detection was a single bit in the PHY.
It looks like you've just deleted all the threshold settings and are
turning on/off using low self correlation.

Have a read of the (very early, AR5212 era?) ANI patent:

http://www.freepatentsonline.com/7349503.pdf

If this has changed (ie, if in Osprey it's done differently) then it
should likely be documented in the driver somewhere.

IF this all works then great, but I'd really like to try and better
understand/document the "why" aspects of it. ANI is .. fiddly, and
there aren't any tools available for ath9k (+freebsd) wireless
developers to really understand the impact of ANI.

Thanks,



Adrian

2012-02-01 16:05:27

by Rajkumar Manoharan

[permalink] [raw]
Subject: [PATCH 2/2] ath9k_hw: improve ANI processing and rx desensitizing parameters

This patch improves ANI operations by switching among the immunity
levels based on PHY errors and beacon rssi which will adjust receiver
desensitizing parameters. The changes are

* Irrespective of opmode, the Weak Signal Detection is with the current
immunity level value.
* At highest OFDM immunity level poor performance was observed with
strong interference. By tuning the FIR step and spur immunity levels
and not changing any weak signal detection thresholds at any level
helped to improve the performance.
* ANI took long time to recover back to lower immunity levels on heavy
data load. As the listen time got reset to zero before reaching to
the 5x of aniperiod, the immunity level is not lowering back even
without any interference. This patch fix that.
* Reset ANI values to default on disconnection.

Cc: Paul Stewart <[email protected]>
Cc: Susinder Gulasekaran <[email protected]>
Signed-off-by: Suresh Chandrasekaran <[email protected]>
Signed-off-by: Rajkumar Manoharan <[email protected]>
---
drivers/net/wireless/ath/ath9k/ani.c | 50 +++++++++++++--------------
drivers/net/wireless/ath/ath9k/ani.h | 6 ++-
drivers/net/wireless/ath/ath9k/ar5008_phy.c | 38 --------------------
drivers/net/wireless/ath/ath9k/ar9003_phy.c | 49 --------------------------
drivers/net/wireless/ath/ath9k/main.c | 1 +
5 files changed, 29 insertions(+), 115 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ani.c b/drivers/net/wireless/ath/ath9k/ani.c
index bc56f57..516e29f 100644
--- a/drivers/net/wireless/ath/ath9k/ani.c
+++ b/drivers/net/wireless/ath/ath9k/ani.c
@@ -46,8 +46,8 @@ static const struct ani_ofdm_level_entry ofdm_level_table[] = {
{ 5, 4, 1 }, /* lvl 5 */
{ 6, 5, 1 }, /* lvl 6 */
{ 7, 6, 1 }, /* lvl 7 */
- { 7, 7, 1 }, /* lvl 8 */
- { 7, 8, 0 } /* lvl 9 */
+ { 7, 6, 0 }, /* lvl 8 */
+ { 7, 7, 0 } /* lvl 9 */
};
#define ATH9K_ANI_OFDM_NUM_LEVEL \
ARRAY_SIZE(ofdm_level_table)
@@ -91,8 +91,8 @@ static const struct ani_cck_level_entry cck_level_table[] = {
{ 4, 0 }, /* lvl 4 */
{ 5, 0 }, /* lvl 5 */
{ 6, 0 }, /* lvl 6 */
- { 7, 0 }, /* lvl 7 (only for high rssi) */
- { 8, 0 } /* lvl 8 (only for high rssi) */
+ { 6, 0 }, /* lvl 7 (only for high rssi) */
+ { 7, 0 } /* lvl 8 (only for high rssi) */
};

#define ATH9K_ANI_CCK_NUM_LEVEL \
@@ -290,16 +290,9 @@ static void ath9k_hw_set_ofdm_nil(struct ath_hw *ah, u8 immunityLevel)
ATH9K_ANI_FIRSTEP_LEVEL,
entry_ofdm->fir_step_level);

- if ((ah->opmode != NL80211_IFTYPE_STATION &&
- ah->opmode != NL80211_IFTYPE_ADHOC) ||
- aniState->noiseFloor <= aniState->rssiThrHigh) {
- if (aniState->ofdmWeakSigDetectOff)
- /* force on ofdm weak sig detect */
- ath9k_hw_ani_control(ah,
- ATH9K_ANI_OFDM_WEAK_SIGNAL_DETECTION,
- true);
- else if (aniState->ofdmWeakSigDetectOff ==
- entry_ofdm->ofdm_weak_signal_on)
+ if ((aniState->noiseFloor >= aniState->rssiThrHigh) &&
+ (aniState->ofdmWeakSigDetectOff !=
+ entry_ofdm->ofdm_weak_signal_on)) {
ath9k_hw_ani_control(ah,
ATH9K_ANI_OFDM_WEAK_SIGNAL_DETECTION,
entry_ofdm->ofdm_weak_signal_on);
@@ -717,26 +710,30 @@ void ath9k_hw_ani_monitor(struct ath_hw *ah, struct ath9k_channel *chan)
ofdmPhyErrRate, aniState->cckNoiseImmunityLevel,
cckPhyErrRate, aniState->ofdmsTurn);

- if (aniState->listenTime > 5 * ah->aniperiod) {
- if (ofdmPhyErrRate <= ah->config.ofdm_trig_low &&
- cckPhyErrRate <= ah->config.cck_trig_low) {
+ if (aniState->listenTime > ah->aniperiod) {
+ if (cckPhyErrRate < ah->config.cck_trig_low &&
+ ((ofdmPhyErrRate < ah->config.ofdm_trig_low &&
+ aniState->ofdmNoiseImmunityLevel <
+ ATH9K_ANI_OFDM_DEF_LEVEL) ||
+ (ofdmPhyErrRate < ATH9K_ANI_OFDM_TRIG_LOW_ABOVE_INI &&
+ aniState->ofdmNoiseImmunityLevel >=
+ ATH9K_ANI_OFDM_DEF_LEVEL))) {
ath9k_hw_ani_lower_immunity(ah);
aniState->ofdmsTurn = !aniState->ofdmsTurn;
- }
- ath9k_ani_restart(ah);
- } else if (aniState->listenTime > ah->aniperiod) {
- /* check to see if need to raise immunity */
- if (ofdmPhyErrRate > ah->config.ofdm_trig_high &&
- (cckPhyErrRate <= ah->config.cck_trig_high ||
- aniState->ofdmsTurn)) {
+ } else if ((ofdmPhyErrRate > ah->config.ofdm_trig_high &&
+ aniState->ofdmNoiseImmunityLevel >=
+ ATH9K_ANI_OFDM_DEF_LEVEL) ||
+ (ofdmPhyErrRate >
+ ATH9K_ANI_OFDM_TRIG_HIGH_BELOW_INI &&
+ aniState->ofdmNoiseImmunityLevel <
+ ATH9K_ANI_OFDM_DEF_LEVEL)) {
ath9k_hw_ani_ofdm_err_trigger(ah);
- ath9k_ani_restart(ah);
aniState->ofdmsTurn = false;
} else if (cckPhyErrRate > ah->config.cck_trig_high) {
ath9k_hw_ani_cck_err_trigger(ah);
- ath9k_ani_restart(ah);
aniState->ofdmsTurn = true;
}
+ ath9k_ani_restart(ah);
}
}
EXPORT_SYMBOL(ath9k_hw_ani_monitor);
@@ -911,3 +908,4 @@ void ath9k_hw_ani_init(struct ath_hw *ah)
ath9k_ani_restart(ah);
ath9k_enable_mib_counters(ah);
}
+EXPORT_SYMBOL(ath9k_hw_ani_init);
diff --git a/drivers/net/wireless/ath/ath9k/ani.h b/drivers/net/wireless/ath/ath9k/ani.h
index 83029d6..72e2b87 100644
--- a/drivers/net/wireless/ath/ath9k/ani.h
+++ b/drivers/net/wireless/ath/ath9k/ani.h
@@ -25,11 +25,13 @@

/* units are errors per second */
#define ATH9K_ANI_OFDM_TRIG_HIGH_OLD 500
-#define ATH9K_ANI_OFDM_TRIG_HIGH_NEW 1000
+#define ATH9K_ANI_OFDM_TRIG_HIGH_NEW 3500
+#define ATH9K_ANI_OFDM_TRIG_HIGH_BELOW_INI 1000

/* units are errors per second */
#define ATH9K_ANI_OFDM_TRIG_LOW_OLD 200
#define ATH9K_ANI_OFDM_TRIG_LOW_NEW 400
+#define ATH9K_ANI_OFDM_TRIG_LOW_ABOVE_INI 900

/* units are errors per second */
#define ATH9K_ANI_CCK_TRIG_HIGH_OLD 200
@@ -53,7 +55,7 @@
#define ATH9K_ANI_RSSI_THR_LOW 7

#define ATH9K_ANI_PERIOD_OLD 100
-#define ATH9K_ANI_PERIOD_NEW 1000
+#define ATH9K_ANI_PERIOD_NEW 300

/* in ms */
#define ATH9K_ANI_POLLINTERVAL_OLD 100
diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
index f901a17..51d3b7f 100644
--- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
@@ -1079,46 +1079,8 @@ static bool ar5008_hw_ani_control_old(struct ath_hw *ah,
break;
}
case ATH9K_ANI_OFDM_WEAK_SIGNAL_DETECTION:{
- static const int m1ThreshLow[] = { 127, 50 };
- static const int m2ThreshLow[] = { 127, 40 };
- static const int m1Thresh[] = { 127, 0x4d };
- static const int m2Thresh[] = { 127, 0x40 };
- static const int m2CountThr[] = { 31, 16 };
- static const int m2CountThrLow[] = { 63, 48 };
u32 on = param ? 1 : 0;

- REG_RMW_FIELD(ah, AR_PHY_SFCORR_LOW,
- AR_PHY_SFCORR_LOW_M1_THRESH_LOW,
- m1ThreshLow[on]);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_LOW,
- AR_PHY_SFCORR_LOW_M2_THRESH_LOW,
- m2ThreshLow[on]);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR,
- AR_PHY_SFCORR_M1_THRESH,
- m1Thresh[on]);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR,
- AR_PHY_SFCORR_M2_THRESH,
- m2Thresh[on]);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR,
- AR_PHY_SFCORR_M2COUNT_THR,
- m2CountThr[on]);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_LOW,
- AR_PHY_SFCORR_LOW_M2COUNT_THR_LOW,
- m2CountThrLow[on]);
-
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_EXT,
- AR_PHY_SFCORR_EXT_M1_THRESH_LOW,
- m1ThreshLow[on]);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_EXT,
- AR_PHY_SFCORR_EXT_M2_THRESH_LOW,
- m2ThreshLow[on]);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_EXT,
- AR_PHY_SFCORR_EXT_M1_THRESH,
- m1Thresh[on]);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_EXT,
- AR_PHY_SFCORR_EXT_M2_THRESH,
- m2Thresh[on]);
-
if (on)
REG_SET_BIT(ah, AR_PHY_SFCORR_LOW,
AR_PHY_SFCORR_LOW_USE_SELF_CORR_LOW);
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c
index 2b0bfb8..677b912 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c
@@ -824,55 +824,6 @@ static bool ar9003_hw_ani_control(struct ath_hw *ah,
* on == 0 means more noise imm
*/
u32 on = param ? 1 : 0;
- /*
- * make register setting for default
- * (weak sig detect ON) come from INI file
- */
- int m1ThreshLow = on ?
- aniState->iniDef.m1ThreshLow : m1ThreshLow_off;
- int m2ThreshLow = on ?
- aniState->iniDef.m2ThreshLow : m2ThreshLow_off;
- int m1Thresh = on ?
- aniState->iniDef.m1Thresh : m1Thresh_off;
- int m2Thresh = on ?
- aniState->iniDef.m2Thresh : m2Thresh_off;
- int m2CountThr = on ?
- aniState->iniDef.m2CountThr : m2CountThr_off;
- int m2CountThrLow = on ?
- aniState->iniDef.m2CountThrLow : m2CountThrLow_off;
- int m1ThreshLowExt = on ?
- aniState->iniDef.m1ThreshLowExt : m1ThreshLowExt_off;
- int m2ThreshLowExt = on ?
- aniState->iniDef.m2ThreshLowExt : m2ThreshLowExt_off;
- int m1ThreshExt = on ?
- aniState->iniDef.m1ThreshExt : m1ThreshExt_off;
- int m2ThreshExt = on ?
- aniState->iniDef.m2ThreshExt : m2ThreshExt_off;
-
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_LOW,
- AR_PHY_SFCORR_LOW_M1_THRESH_LOW,
- m1ThreshLow);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_LOW,
- AR_PHY_SFCORR_LOW_M2_THRESH_LOW,
- m2ThreshLow);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR,
- AR_PHY_SFCORR_M1_THRESH, m1Thresh);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR,
- AR_PHY_SFCORR_M2_THRESH, m2Thresh);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR,
- AR_PHY_SFCORR_M2COUNT_THR, m2CountThr);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_LOW,
- AR_PHY_SFCORR_LOW_M2COUNT_THR_LOW,
- m2CountThrLow);
-
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_EXT,
- AR_PHY_SFCORR_EXT_M1_THRESH_LOW, m1ThreshLowExt);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_EXT,
- AR_PHY_SFCORR_EXT_M2_THRESH_LOW, m2ThreshLowExt);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_EXT,
- AR_PHY_SFCORR_EXT_M1_THRESH, m1ThreshExt);
- REG_RMW_FIELD(ah, AR_PHY_SFCORR_EXT,
- AR_PHY_SFCORR_EXT_M2_THRESH, m2ThreshExt);

if (on)
REG_SET_BIT(ah, AR_PHY_SFCORR_LOW,
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index b4322cb..d388034 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -2126,6 +2126,7 @@ static void ath9k_config_bss(struct ath_softc *sc, struct ieee80211_vif *vif)
del_timer_sync(&common->ani.timer);
del_timer_sync(&sc->rx_poll_timer);
memset(&sc->caldata, 0, sizeof(sc->caldata));
+ ath9k_hw_ani_init(sc->sc_ah);
}
}

--
1.7.9


2012-02-04 10:30:29

by Adrian Chadd

[permalink] [raw]
Subject: Re: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

On 2 February 2012 22:04, Rajkumar Manoharan <[email protected]> wrote:
>> > Ok. But this stuff really needs to be verified for other chips.
>> > These 'poll' workarounds are piling up.
>> >
>>
>> Hi,
>>
>> Is there a bugzilla.kernel.org bug for this particular behaviour? And if
>> you can dig up the internal bug number, please ?post it privately.
>>
> No. This issue was originally reported by Google in their setup.

Can we please ensure that a bugzilla.kernel.org bug report is created,
at the very least?

Thanks,


Adrian

2012-02-02 04:09:45

by Sujith Manoharan

[permalink] [raw]
Subject: Re: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

Rajkumar Manoharan wrote:
> On Thu, Feb 02, 2012 at 08:38:32AM +0530, Sujith Manoharan wrote:
> > Rajkumar Manoharan wrote:
> > > +void ath_start_rx_poll(struct ath_softc *sc, const u32 msec)
> > > +{
> > > + if (!AR_SREV_9300_20_OR_LATER(sc->sc_ah))
> > > + return;
> >
> > I have not reviewed the patch, but to do such workarounds for an entire
> > family of chips is overkill. Has it been confirmed that the other
> > chips falling under this SREV check require this snafu ?
> >
> unfortunately this snafu is applicable for all ar9380 MAC based chips ;)
> and confirmed the same.

Ok. But this stuff really needs to be verified for other chips.
These 'poll' workarounds are piling up.

Sujith

2012-02-01 18:49:47

by Rajkumar Manoharan

[permalink] [raw]
Subject: Re: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

On Wed, Feb 01, 2012 at 09:31:50AM -0800, Ben Greear wrote:
> On 02/01/2012 08:05 AM, Rajkumar Manoharan wrote:
> >In the following scenario, where the distance b/w STA and AP is ~10m
> >or in a shield environment by placing an attenuator with reduced AP
> >txpower, the station started reporting with beacon loss and got
> >disconnected whenever the chariot endpoint was initiated with BiDi
> >traffic. In such state, two different stuck cases were observed.
> >
> > * rx clear is stuck at low for more than 100ms
> > * dcu chain and complete state is stuck.
> >
> >This patch detects the stuck state if the beacons are not received for
> >more than 300ms. In the above matching conditions, trigger a chip
> >reset to recover. This issue was originally reported in 3.0 kernel with
> >AR9382 chip by having two stations associated with two different APs in
> >the same channel and was attenuated/controlled by Azimuth ADEPT-n box.
>
> Can't the AP be configured for larger beacon intervals? Maybe have it
> be some multiple of that instead of a fixed 300ms?
>
Yes. It can. But the detection log will look for specific signature and
meanwhile if the beacons are received, the inprogress logic will be aborted.
I believe that 300ms is not too short or too long to trigger the detection.
Isn't it?

-Rajkumar

2012-02-07 21:41:09

by Adrian Chadd

[permalink] [raw]
Subject: Re: [PATCH 2/2] ath9k_hw: improve ANI processing and rx desensitizing parameters

On 5 February 2012 23:10, Rajkumar Manoharan <[email protected]> wrote:

>> I'd like to suggest we hold off on committing anything that really
>> changes the ANI behaviour without figuring out exactly what's going on
>> underneath the hood. I think it's worthwhile filing a bugzilla report
>> and stuffing that patch against the bug though, so it's not "lost".
>>

[snip]

> To be very precise, there are two types of weak signal detection logic in our
> baseband -
>
> 1) normal weak signal detection logic - involves thresholds m1_thresh,
> ? m2_thresh, m2_Count_thr
> 2) low weak signal detection logic - involves thresholds m1_thresh_low,
> ? m2_thresh_low, m2_count_thr_low
>
> Type 2 is intended to handle low weak signals and internally baseband logic does
> averaging over larger time windows compared to type 1. In current ANI code,
> both types 1) and 2) are clubbed together and called as "weak signal detection".
>
> What we've found in extensive testing is that disabling type 1 and type 2 form of
> weak signal detection logic together causes "beacon miss". Disabling both types
> 1 and type 2 together is definitely harmful to our receiver as it cannot receive
> even medium strength signals. Disabling type 2 alone is completely safe which
> is what we're doing in this change.
>
> For disabling type 2 logic alone there is a register field named
> "use_self_corr_low". So instead of maxing out threshold values of 3-4 registers
> we simply set use_self_corr_low=0 saving register writes. Hepe this could
> clarify your question.

This does, yes. I'll run this by the baseband guys just to see which
chips this applies to and I'll get back to you.

How'd you determine that it was these weak signal detection
parameters? I've been thinking about writing an ANI (well, "baseband")
visualisation tool for FreeBSD that just watches the error rates and a
few other registers and plots things. That way we could watch how
effective ANI is being over time.

Thanks very much for chasing this up!


Adrian

2012-02-01 17:32:02

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH 1/2] ath9k: recover the chip from tx/rx stuck

On 02/01/2012 08:05 AM, Rajkumar Manoharan wrote:
> In the following scenario, where the distance b/w STA and AP is ~10m
> or in a shield environment by placing an attenuator with reduced AP
> txpower, the station started reporting with beacon loss and got
> disconnected whenever the chariot endpoint was initiated with BiDi
> traffic. In such state, two different stuck cases were observed.
>
> * rx clear is stuck at low for more than 100ms
> * dcu chain and complete state is stuck.
>
> This patch detects the stuck state if the beacons are not received for
> more than 300ms. In the above matching conditions, trigger a chip
> reset to recover. This issue was originally reported in 3.0 kernel with
> AR9382 chip by having two stations associated with two different APs in
> the same channel and was attenuated/controlled by Azimuth ADEPT-n box.

Can't the AP be configured for larger beacon intervals? Maybe have it
be some multiple of that instead of a fixed 300ms?

Thanks,
Ben

--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com


2012-02-03 05:56:31

by Rajkumar Manoharan

[permalink] [raw]
Subject: Re: [PATCH 2/2] ath9k_hw: improve ANI processing and rx desensitizing parameters

On Thu, Feb 02, 2012 at 02:09:10PM -0800, Adrian Chadd wrote:
> Hi, questions below:
>
> On 1 February 2012 08:05, Rajkumar Manoharan <[email protected]> wrote:
> >
> > This patch improves ANI operations by switching among the immunity
> > levels based on PHY errors and beacon rssi which will adjust receiver
> > desensitizing parameters. The changes are
>
> What environments are you seeing this bad behaviour in? I'd just like
> to be sure that this won't mess up the behaviour of ANI for other
> chips, other environments and other operating modes.
>
The actual test was done in AP mode in controlled environment with rx signal
level of -50 dBm and with interference of -45 dBm at STA side. In such scenario
beacon miss was observered and drastic hit in throughput at OFDM ani level 9 after
enabling the Weak signal detection. Not changing the any of weak signal detection
thresholds and retaining the ini values improved the throughput at higher ANI
levels.
> > * Irrespective of opmode, the Weak Signal Detection is with the current
> > ?immunity level value.
>
Again with AP mode enabling/disabling the Weak signal detection is not working.
It was verified from the register (use_self_corr_low). So that it will
configure the WSD with current ofdm immunity level, if there is mismatch b/w
configured WSD state and selected immunity level's WSD state.

> I didn't think that weak signal detection was a single bit in the PHY.
> It looks like you've just deleted all the threshold settings and are
> turning on/off using low self correlation.
>
As menstion above, the thresh values are continue to retain the init values.
Based on the test results, it is enough to update use_self_corr_low state.

> Have a read of the (very early, AR5212 era?) ANI patent:
>
> http://www.freepatentsonline.com/7349503.pdf
>
> If this has changed (ie, if in Osprey it's done differently) then it
> should likely be documented in the driver somewhere.
>
> IF this all works then great, but I'd really like to try and better
> understand/document the "why" aspects of it. ANI is .. fiddly, and
> there aren't any tools available for ath9k (+freebsd) wireless
> developers to really understand the impact of ANI.
>
--
Rajkumar