LinuxLists.cc - [PATCH v2] wlcore: use separate HW queue for each AC in each vif

2012-11-29 22:48:09

Subject: [PATCH v2] wlcore: use separate HW queue for each AC in each vif

Start using the new hw_queue mechanism in mac80211 and give each AC in
each vif its own hw_queue number. This allows us to stop an AC in a vif
independently from other vifs.

Change the Tx watermark handling functions to count packets per AC in
vif. From now on fast links should not be able to hurt the throughput
of slow links on the same AC but on different vifs.

Change internal queue mgmt functions to operate per vif, to support the
new Tx watermark granularity. Make the global versions of the queue
stop/start functions to use the global mac80211 API for queue mgmt. This
helps in situations where the driver currently doesn't know all the vifs
that reside in mac80211. Recovery is a good example for such a case.

Signed-off-by: Arik Nemtsov <[email protected]>
---
v2: move to new vif iterator API

drivers/net/wireless/ti/wlcore/main.c | 101 +++++++++++++++++++++++++--
drivers/net/wireless/ti/wlcore/tx.c | 107 +++++++++++++++++------------
drivers/net/wireless/ti/wlcore/tx.h | 15 ++--
drivers/net/wireless/ti/wlcore/wlcore.h | 3 +-
drivers/net/wireless/ti/wlcore/wlcore_i.h | 9 +++
5 files changed, 175 insertions(+), 60 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 15631fa..c9fb441 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -1223,8 +1223,8 @@ static void wl1271_op_tx(struct ieee80211_hw *hw,
*/
if (hlid == WL12XX_INVALID_LINK_ID ||
(!test_bit(hlid, wlvif->links_map)) ||
- (wlcore_is_queue_stopped(wl, q) &&
- !wlcore_is_queue_stopped_by_reason(wl, q,
+ (wlcore_is_queue_stopped(wl, wlvif, q) &&
+ !wlcore_is_queue_stopped_by_reason(wl, wlvif, q,
WLCORE_QUEUE_STOP_REASON_WATERMARK))) {
wl1271_debug(DEBUG_TX, "DROP skb hlid %d q %d", hlid, q);
ieee80211_free_txskb(hw, skb);
@@ -1242,11 +1242,11 @@ static void wl1271_op_tx(struct ieee80211_hw *hw,
* The workqueue is slow to process the tx_queue and we need stop
* the queue here, otherwise the queue will get too long.
*/
- if (wl->tx_queue_count[q] >= WL1271_TX_QUEUE_HIGH_WATERMARK &&
- !wlcore_is_queue_stopped_by_reason(wl, q,
+ if (wlvif->tx_queue_count[q] >= WL1271_TX_QUEUE_HIGH_WATERMARK &&
+ !wlcore_is_queue_stopped_by_reason(wl, wlvif, q,
WLCORE_QUEUE_STOP_REASON_WATERMARK)) {
wl1271_debug(DEBUG_TX, "op_tx: stopping queues for q %d", q);
- wlcore_stop_queue_locked(wl, q,
+ wlcore_stop_queue_locked(wl, wlvif, q,
WLCORE_QUEUE_STOP_REASON_WATERMARK);
}

@@ -2313,6 +2313,81 @@ static void wl12xx_force_active_psm(struct wl1271 *wl)
}
}

+struct wlcore_hw_queue_iter_data {
+ unsigned long hw_queue_map[BITS_TO_LONGS(WLCORE_NUM_MAC_ADDRESSES)];
+ /* current vif */
+ struct ieee80211_vif *vif;
+ /* is the current vif among those iterated */
+ bool cur_running;
+};
+
+static void wlcore_hw_queue_iter(void *data, u8 *mac,
+ struct ieee80211_vif *vif)
+{
+ struct wlcore_hw_queue_iter_data *iter_data = data;
+
+ if (WARN_ON(vif->hw_queue[0] == IEEE80211_INVAL_HW_QUEUE))
+ return;
+
+ if (iter_data->cur_running || vif == iter_data->vif) {
+ iter_data->cur_running = true;
+ return;
+ }
+
+ __set_bit(vif->hw_queue[0] / NUM_TX_QUEUES, iter_data->hw_queue_map);
+}
+
+static int wlcore_allocate_hw_queue_base(struct wl1271 *wl,
+ struct wl12xx_vif *wlvif)
+{
+ struct ieee80211_vif *vif = wl12xx_wlvif_to_vif(wlvif);
+ struct wlcore_hw_queue_iter_data iter_data = {};
+ int i, q_base;
+
+ iter_data.vif = vif;
+
+ /* mark all bits taken by active interfaces */
+ ieee80211_iterate_active_interfaces_atomic(wl->hw,
+ IEEE80211_IFACE_ITER_RESUME_ALL,
+ wlcore_hw_queue_iter, &iter_data);
+
+ /* the current vif is already running in mac80211 (resume/recovery) */
+ if (iter_data.cur_running) {
+ wlvif->hw_queue_base = vif->hw_queue[0];
+ wl1271_debug(DEBUG_MAC80211,
+ "using pre-allocated hw queue base %d",
+ wlvif->hw_queue_base);
+
+ /* interface type might have changed type */
+ goto adjust_cab_queue;
+ }
+
+ q_base = find_first_zero_bit(iter_data.hw_queue_map,
+ WLCORE_NUM_MAC_ADDRESSES);
+ if (q_base >= WLCORE_NUM_MAC_ADDRESSES)
+ return -EBUSY;
+
+ wlvif->hw_queue_base = q_base * NUM_TX_QUEUES;
+ wl1271_debug(DEBUG_MAC80211, "allocating hw queue base: %d",
+ wlvif->hw_queue_base);
+
+ for (i = 0; i < NUM_TX_QUEUES; i++) {
+ wl->queue_stop_reasons[wlvif->hw_queue_base + i] = 0;
+ /* register hw queues in mac80211 */
+ vif->hw_queue[i] = wlvif->hw_queue_base + i;
+ }
+
+adjust_cab_queue:
+ /* the last places are reserved for cab queues per interface */
+ if (wlvif->bss_type == BSS_TYPE_AP_BSS)
+ vif->cab_queue = NUM_TX_QUEUES * WLCORE_NUM_MAC_ADDRESSES +
+ wlvif->hw_queue_base / NUM_TX_QUEUES;
+ else
+ vif->cab_queue = IEEE80211_INVAL_HW_QUEUE;
+
+ return 0;
+}
+
static int wl1271_op_add_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
@@ -2364,6 +2439,10 @@ static int wl1271_op_add_interface(struct ieee80211_hw *hw,
goto out;
}

+ ret = wlcore_allocate_hw_queue_base(wl, wlvif);
+ if (ret < 0)
+ goto out;
+
if (wl12xx_need_fw_change(wl, vif_count, true)) {
wl12xx_force_active_psm(wl);
set_bit(WL1271_FLAG_INTENDED_FW_RECOVERY, &wl->flags);
@@ -5592,7 +5671,8 @@ static int wl1271_init_ieee80211(struct wl1271 *wl)
IEEE80211_HW_AP_LINK_PS |
IEEE80211_HW_AMPDU_AGGREGATION |
IEEE80211_HW_TX_AMPDU_SETUP_IN_HW |
- IEEE80211_HW_SCAN_WHILE_IDLE;
+ IEEE80211_HW_SCAN_WHILE_IDLE |
+ IEEE80211_HW_QUEUE_CONTROL;

wl->hw->wiphy->cipher_suites = cipher_suites;
wl->hw->wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
@@ -5659,7 +5739,14 @@ static int wl1271_init_ieee80211(struct wl1271 *wl)
wl->hw->wiphy->bands[IEEE80211_BAND_5GHZ] =
&wl->bands[IEEE80211_BAND_5GHZ];

- wl->hw->queues = 4;
+ /*
+ * allow 4 queues per mac address we support +
+ * 1 cab queue per mac + one global offchannel Tx queue
+ */
+ wl->hw->queues = (NUM_TX_QUEUES + 1) * WLCORE_NUM_MAC_ADDRESSES + 1;
+
+ /* the last queue is the offchannel queue */
+ wl->hw->offchannel_tx_hw_queue = wl->hw->queues - 1;
wl->hw->max_rates = 1;

wl->hw->wiphy->reg_notifier = wl1271_reg_notify;
diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
index d799fc9..dbc1721 100644
--- a/drivers/net/wireless/ti/wlcore/tx.c
+++ b/drivers/net/wireless/ti/wlcore/tx.c
@@ -447,14 +447,17 @@ u32 wl1271_tx_enabled_rates_get(struct wl1271 *wl, u32 rate_set,
void wl1271_handle_tx_low_watermark(struct wl1271 *wl)
{
int i;
+ struct wl12xx_vif *wlvif;

- for (i = 0; i < NUM_TX_QUEUES; i++) {
- if (wlcore_is_queue_stopped_by_reason(wl, i,
- WLCORE_QUEUE_STOP_REASON_WATERMARK) &&
- wl->tx_queue_count[i] <= WL1271_TX_QUEUE_LOW_WATERMARK) {
- /* firmware buffer has space, restart queues */
- wlcore_wake_queue(wl, i,
- WLCORE_QUEUE_STOP_REASON_WATERMARK);
+ wl12xx_for_each_wlvif(wl, wlvif) {
+ for (i = 0; i < NUM_TX_QUEUES; i++) {
+ if (wlcore_is_queue_stopped_by_reason(wl, wlvif, i,
+ WLCORE_QUEUE_STOP_REASON_WATERMARK) &&
+ wlvif->tx_queue_count[i] <=
+ WL1271_TX_QUEUE_LOW_WATERMARK)
+ /* firmware buffer has space, restart queues */
+ wlcore_wake_queue(wl, wlvif, i,
+ WLCORE_QUEUE_STOP_REASON_WATERMARK);
}
}
}
@@ -1189,44 +1192,45 @@ u32 wl1271_tx_min_rate_get(struct wl1271 *wl, u32 rate_set)
}
EXPORT_SYMBOL_GPL(wl1271_tx_min_rate_get);

-void wlcore_stop_queue_locked(struct wl1271 *wl, u8 queue,
- enum wlcore_queue_stop_reason reason)
+void wlcore_stop_queue_locked(struct wl1271 *wl, struct wl12xx_vif *wlvif,
+ u8 queue, enum wlcore_queue_stop_reason reason)
{
- bool stopped = !!wl->queue_stop_reasons[queue];
+ int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);
+ bool stopped = !!wl->queue_stop_reasons[hwq];

/* queue should not be stopped for this reason */
- WARN_ON(test_and_set_bit(reason, &wl->queue_stop_reasons[queue]));
+ WARN_ON(test_and_set_bit(reason, &wl->queue_stop_reasons[hwq]));

if (stopped)
return;

- ieee80211_stop_queue(wl->hw, wl1271_tx_get_mac80211_queue(queue));
+ ieee80211_stop_queue(wl->hw, hwq);
}

-void wlcore_stop_queue(struct wl1271 *wl, u8 queue,
+void wlcore_stop_queue(struct wl1271 *wl, struct wl12xx_vif *wlvif, u8 queue,
enum wlcore_queue_stop_reason reason)
{
unsigned long flags;

spin_lock_irqsave(&wl->wl_lock, flags);
- wlcore_stop_queue_locked(wl, queue, reason);
+ wlcore_stop_queue_locked(wl, wlvif, queue, reason);
spin_unlock_irqrestore(&wl->wl_lock, flags);
}

-void wlcore_wake_queue(struct wl1271 *wl, u8 queue,
+void wlcore_wake_queue(struct wl1271 *wl, struct wl12xx_vif *wlvif, u8 queue,
enum wlcore_queue_stop_reason reason)
{
unsigned long flags;
+ int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);

spin_lock_irqsave(&wl->wl_lock, flags);
-
/* queue should not be clear for this reason */
- WARN_ON(!test_and_clear_bit(reason, &wl->queue_stop_reasons[queue]));
+ WARN_ON(!test_and_clear_bit(reason, &wl->queue_stop_reasons[hwq]));

- if (wl->queue_stop_reasons[queue])
+ if (wl->queue_stop_reasons[hwq])
goto out;

- ieee80211_wake_queue(wl->hw, wl1271_tx_get_mac80211_queue(queue));
+ ieee80211_wake_queue(wl->hw, hwq);

out:
spin_unlock_irqrestore(&wl->wl_lock, flags);
@@ -1235,49 +1239,62 @@ out:
void wlcore_stop_queues(struct wl1271 *wl,
enum wlcore_queue_stop_reason reason)
{
- int i;
+ int i, q_base;
+ unsigned long flags;

- for (i = 0; i < NUM_TX_QUEUES; i++)
- wlcore_stop_queue(wl, i, reason);
-}
-EXPORT_SYMBOL_GPL(wlcore_stop_queues);
+ spin_lock_irqsave(&wl->wl_lock, flags);

-void wlcore_wake_queues(struct wl1271 *wl,
- enum wlcore_queue_stop_reason reason)
-{
- int i;
+ for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
+ for (i = 0; i < NUM_TX_QUEUES; i++) {
+ int h = q_base * NUM_TX_QUEUES + i;
+ WARN_ON(test_and_set_bit(reason,
+ &wl->queue_stop_reasons[h]));
+ }

- for (i = 0; i < NUM_TX_QUEUES; i++)
- wlcore_wake_queue(wl, i, reason);
+ /*
+ * use the global version to make sure all vifs in mac80211 we don't
+ * know are stopped.
+ */
+ ieee80211_stop_queues(wl->hw);
+
+ spin_unlock_irqrestore(&wl->wl_lock, flags);
}
-EXPORT_SYMBOL_GPL(wlcore_wake_queues);

-void wlcore_reset_stopped_queues(struct wl1271 *wl)
+void wlcore_wake_queues(struct wl1271 *wl,
+ enum wlcore_queue_stop_reason reason)
{
- int i;
+ int i, q_base;
unsigned long flags;

spin_lock_irqsave(&wl->wl_lock, flags);

- for (i = 0; i < NUM_TX_QUEUES; i++) {
- if (!wl->queue_stop_reasons[i])
- continue;
+ for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
+ for (i = 0; i < NUM_TX_QUEUES; i++) {
+ int h = q_base * NUM_TX_QUEUES + i;
+ WARN_ON(!test_and_clear_bit(reason,
+ &wl->queue_stop_reasons[h]));
+ }

- wl->queue_stop_reasons[i] = 0;
- ieee80211_wake_queue(wl->hw,
- wl1271_tx_get_mac80211_queue(i));
- }
+ /*
+ * use the global version to make sure all vifs in mac80211 we don't
+ * know are woken up.
+ */
+ ieee80211_wake_queues(wl->hw);

spin_unlock_irqrestore(&wl->wl_lock, flags);
}

-bool wlcore_is_queue_stopped_by_reason(struct wl1271 *wl, u8 queue,
- enum wlcore_queue_stop_reason reason)
+bool wlcore_is_queue_stopped_by_reason(struct wl1271 *wl,
+ struct wl12xx_vif *wlvif, u8 queue,
+ enum wlcore_queue_stop_reason reason)
{
- return test_bit(reason, &wl->queue_stop_reasons[queue]);
+ int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);
+ return test_bit(reason, &wl->queue_stop_reasons[hwq]);
}

-bool wlcore_is_queue_stopped(struct wl1271 *wl, u8 queue)
+bool wlcore_is_queue_stopped(struct wl1271 *wl, struct wl12xx_vif *wlvif,
+ u8 queue)
{
- return !!wl->queue_stop_reasons[queue];
+ int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);
+ return !!wl->queue_stop_reasons[hwq];
}
diff --git a/drivers/net/wireless/ti/wlcore/tx.h b/drivers/net/wireless/ti/wlcore/tx.h
index 349520d..3d854e5 100644
--- a/drivers/net/wireless/ti/wlcore/tx.h
+++ b/drivers/net/wireless/ti/wlcore/tx.h
@@ -252,20 +252,21 @@ void wl12xx_rearm_rx_streaming(struct wl1271 *wl, unsigned long *active_hlids);
unsigned int wlcore_calc_packet_alignment(struct wl1271 *wl,
unsigned int packet_length);
void wl1271_free_tx_id(struct wl1271 *wl, int id);
-void wlcore_stop_queue_locked(struct wl1271 *wl, u8 queue,
- enum wlcore_queue_stop_reason reason);
-void wlcore_stop_queue(struct wl1271 *wl, u8 queue,
+void wlcore_stop_queue_locked(struct wl1271 *wl, struct wl12xx_vif *wlvif,
+ u8 queue, enum wlcore_queue_stop_reason reason);
+void wlcore_stop_queue(struct wl1271 *wl, struct wl12xx_vif *wlvif, u8 queue,
enum wlcore_queue_stop_reason reason);
-void wlcore_wake_queue(struct wl1271 *wl, u8 queue,
+void wlcore_wake_queue(struct wl1271 *wl, struct wl12xx_vif *wlvif, u8 queue,
enum wlcore_queue_stop_reason reason);
void wlcore_stop_queues(struct wl1271 *wl,
enum wlcore_queue_stop_reason reason);
void wlcore_wake_queues(struct wl1271 *wl,
enum wlcore_queue_stop_reason reason);
-void wlcore_reset_stopped_queues(struct wl1271 *wl);
-bool wlcore_is_queue_stopped_by_reason(struct wl1271 *wl, u8 queue,
+bool wlcore_is_queue_stopped_by_reason(struct wl1271 *wl,
+ struct wl12xx_vif *wlvif, u8 queue,
enum wlcore_queue_stop_reason reason);
-bool wlcore_is_queue_stopped(struct wl1271 *wl, u8 queue);
+bool wlcore_is_queue_stopped(struct wl1271 *wl, struct wl12xx_vif *wlvif,
+ u8 queue);

/* from main.c */
void wl1271_free_sta(struct wl1271 *wl, struct wl12xx_vif *wlvif, u8 hlid);
diff --git a/drivers/net/wireless/ti/wlcore/wlcore.h b/drivers/net/wireless/ti/wlcore/wlcore.h
index f838eb2..921a978 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore.h
@@ -256,7 +256,8 @@ struct wl1271 {

/* Frames scheduled for transmission, not handled yet */
int tx_queue_count[NUM_TX_QUEUES];
- unsigned long queue_stop_reasons[NUM_TX_QUEUES];
+ unsigned long queue_stop_reasons[
+ NUM_TX_QUEUES * WLCORE_NUM_MAC_ADDRESSES];

/* Frames received, not handled yet by mac80211 */
struct sk_buff_head deferred_rx_queue;
diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h
index 69e18e5..6fe7756 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore_i.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h
@@ -431,6 +431,15 @@ struct wl12xx_vif {
int inconn_count;

/*
+ * This vif's queues are mapped to mac80211 HW queues as:
+ * VO - hw_queue_base
+ * VI - hw_queue_base + 1
+ * BE - hw_queue_base + 2
+ * BK - hw_queue_base + 3
+ */
+ int hw_queue_base;
+
+ /*
* This struct must be last!
* data that has to be saved acrossed reconfigs (e.g. recovery)
* should be declared in this struct.
--
1.7.9.5

2012-12-10 21:51:00

by Luciano Coelho

[permalink] [raw]

Subject: Re: [PATCH v2] wlcore: use separate HW queue for each AC in each vif

On Mon, 2012-12-10 at 19:59 +0200, Arik Nemtsov wrote:
> On Mon, Dec 10, 2012 at 5:33 PM, Luciano Coelho <[email protected]> wrote:
> > +static void wlcore_hw_queue_iter(void *data, u8 *mac,
> > + struct ieee80211_vif *vif)
> > +{
> > + struct wlcore_hw_queue_iter_data *iter_data = data;
> > +
> > + if (WARN_ON(vif->hw_queue[0] == IEEE80211_INVAL_HW_QUEUE))
> > + return;
> > +
> > + if (iter_data->cur_running || vif == iter_data->vif) {
> > + iter_data->cur_running = true;
> > + return;
> > + }
> > +
> > + __set_bit(vif->hw_queue[0] / NUM_TX_QUEUES, iter_data->hw_queue_map);
> > +}
> >
> > Can't you just use an int to pass the q_base that you use below? Am I
> > missing something or are you just using the bitmap to pass the vif
> > number?
>
> That's not the case. I'm using all the bits marked. There's a special
> case where the vif already exists in mac80211, we don't allocate a new
> HW-queue-base and just use the existing one in vif->hw_queue_base.

Okay, I see.

> >> +static int wlcore_allocate_hw_queue_base(struct wl1271 *wl,
> >> + struct wl12xx_vif *wlvif)
> >> +{
> >> + struct ieee80211_vif *vif = wl12xx_wlvif_to_vif(wlvif);
> >> + struct wlcore_hw_queue_iter_data iter_data = {};
> >> + int i, q_base;
> >> +
> >> + iter_data.vif = vif;
> >> +
> >> + /* mark all bits taken by active interfaces */
> >> + ieee80211_iterate_active_interfaces_atomic(wl->hw,
> >> + IEEE80211_IFACE_ITER_RESUME_ALL,
> >> + wlcore_hw_queue_iter, &iter_data);
> >
> > The comment here is misleading. You don't really mark all bits, you
> > mark bits one by one. This iteration will only set a single bit.
>
> The iteration will set several bits, one for each active interface.
> Note that cur_running = true in the iterator is a special case in
> suspend/resume, usually in suspend/resume. We don't have cur_running =
> true when a new interface is added.

Now I get it. This function is called with supposedly a new vif (which
it is in most cases). Only in this special case the list of "active"
interfaces in mac80211 will contain the one that is being added...

> >> /+ /* the current vif is already running in mac80211 (resume/recovery) */
> >> + if (iter_data.cur_running) {
> >> + wlvif->hw_queue_base = vif->hw_queue[0];
> >> + wl1271_debug(DEBUG_MAC80211,
> >> + "using pre-allocated hw queue base %d",
> >> + wlvif->hw_queue_base);
> >> +
> >> + /* interface type might have changed type */
> >> + goto adjust_cab_queue;
> >> + }
> >
> > This can probably be avoided with the new IEEE80211_IFACE_ITER_NORMAL,
> > can't it?
>
> we can't use normal since we need to assign the exact same HW queue
> base to existing interfaces (in case of resume from suspend).
> Otherwise we'll have a problem, as mac80211 doesn't flush internal
> packets during suspend. So if we assign a new HW queues
> to an existing vif we risk stopping/starting the wrong vifs using the
> ieee80211_queue_xxx APIs.

Isn't the only difference of using _NORMAL that mac80211 won't call the
iterator function when the driver already knows about the interface?

Or is this happening too late here, because when mac80211 calls the
add_interface op it already considers that the driver knows about it?

> >> + q_base = find_first_zero_bit(iter_data.hw_queue_map,
> >> + WLCORE_NUM_MAC_ADDRESSES);

Okay, this is what confused me. For some reason I interpreted this call
as find_first_bit() instead.

> >> + if (q_base >= WLCORE_NUM_MAC_ADDRESSES)
> >> + return -EBUSY;
> >> +
> >> + wlvif->hw_queue_base = q_base * NUM_TX_QUEUES;
> >> + wl1271_debug(DEBUG_MAC80211, "allocating hw queue base: %d",
> >> + wlvif->hw_queue_base);
> >> +
> >> + for (i = 0; i < NUM_TX_QUEUES; i++) {
> >> + wl->queue_stop_reasons[wlvif->hw_queue_base + i] = 0;
> >> + /* register hw queues in mac80211 */
> >> + vif->hw_queue[i] = wlvif->hw_queue_base + i;
> >> + }
> >> +
> >> +adjust_cab_queue:
> >> + /* the last places are reserved for cab queues per interface */
> >> + if (wlvif->bss_type == BSS_TYPE_AP_BSS)
> >> + vif->cab_queue = NUM_TX_QUEUES * WLCORE_NUM_MAC_ADDRESSES +
> >> + wlvif->hw_queue_base / NUM_TX_QUEUES;
> >> + else
> >> + vif->cab_queue = IEEE80211_INVAL_HW_QUEUE;
> >
> > Why not keep the reservation per-vif in the same order (ie. the cab
> > queue comes together with the other queues for that vif)? It doesn't
> > matter that you don't use the cab_queue for the non AP vifs, since
> > you're allocating them anyway.
> >
> > Keeping them together would make this code clearer. Just make a macro
> > NUM_TX_QUEUES_PER_VIF that includes the space for cab_queue.
>
> I did it on purpose - that way the functions wlcore_wake_queues and
> friends can loop on the "real" vifs and stop/start them.

I don't see the difference. They can still do it all the same.

> >> diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
> >> index d799fc9..dbc1721 100644
> >> --- a/drivers/net/wireless/ti/wlcore/tx.c
> >> +++ b/drivers/net/wireless/ti/wlcore/tx.c
> >
> > [...]
> >
> >> @@ -1189,44 +1192,45 @@ u32 wl1271_tx_min_rate_get(struct wl1271 *wl, u32 rate_set)
> >> }
> >> EXPORT_SYMBOL_GPL(wl1271_tx_min_rate_get);
> >>
> >> -void wlcore_stop_queue_locked(struct wl1271 *wl, u8 queue,
> >> - enum wlcore_queue_stop_reason reason)
> >> +void wlcore_stop_queue_locked(struct wl1271 *wl, struct wl12xx_vif *wlvif,
> >> + u8 queue, enum wlcore_queue_stop_reason reason)
> >> {
> >> - bool stopped = !!wl->queue_stop_reasons[queue];
> >> + int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);
> >
> > It would look nicer if we moved the "wlvif->hw_queue_base + " to the
> > wl1271_tx_get_mac80211_queue() function. After all, the function call
> > without adding the hw_queue_base doesn't mean anything anymore.
>
> well a rename is necessary as well then, since this is not a simple
> translation between mac8021 and wlcore jargon for queues..

I don't think we need to rename. I think wl1271_tx_get_mac80211_queue()
suits very well to include the hw_queue_base. It would be a more
complete translation, actually.

> >> @@ -1235,49 +1239,62 @@ out:
> >> void wlcore_stop_queues(struct wl1271 *wl,
> >> enum wlcore_queue_stop_reason reason)
> >> {
> >> - int i;
> >> + int i, q_base;
> >> + unsigned long flags;
> >>
> >> - for (i = 0; i < NUM_TX_QUEUES; i++)
> >> - wlcore_stop_queue(wl, i, reason);
> >> -}
> >> -EXPORT_SYMBOL_GPL(wlcore_stop_queues);
> >> + spin_lock_irqsave(&wl->wl_lock, flags);
> >>
> >> -void wlcore_wake_queues(struct wl1271 *wl,
> >> - enum wlcore_queue_stop_reason reason)
> >> -{
> >> - int i;
> >> + for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
> >> + for (i = 0; i < NUM_TX_QUEUES; i++) {
> >> + int h = q_base * NUM_TX_QUEUES + i;
> >> + WARN_ON(test_and_set_bit(reason,
> >> + &wl->queue_stop_reasons[h]));
> >> + }
> >
> > This looks a bit hacky. You'll "stop queues" even for vifs that were
> > not added, won't you? Also, you can do with a single for-loop here ;)
>
> there's no harm in setting the bit for queues that were not added, as
> we zero out all stop reasons when a vif is added. we need to zero-out
> the reasons anyway in case someone removes a stopped vif..

Yeah, I know there's no harm, but it still looks a bit ugly. It's
probably not efficient to iterate over the existing vifs to set the stop
reasons, but that would look nicer.

In any case, if you want a quick and dirty way of doing it without
caring about the actual vifs, it could be like this:

for (i = 0; i < WLCORE_NUM_MAC_ADDRESSES * NUM_TX_QUEUES; i++)
WARN_ON(test_and_set_bit(reason,
&wl->queue_stop_reasons[i]));

Your double-loop would make more sense if the cabs were included in the
same block per vif, as I suggested, though.

> > I'm also not sure about the WARN_ON. If one of the queues is already
> > stopped, should we really warn? I see that, currently, the reasons to
> > stop all queues are not used to stop a single queue, but still. :)
>
> you're not supposed to use the global API if you've already used a
> per-vif stop/start API. Generally there are reasons we stop a vif for,
> and other reasons we stop all vifs for. There should not be an overlap
> between reasons as that doesn't make sense.

Yes, this makes my statement below more significant. :)

> I guess we could convert it to WARN_ON_ONCE though.

Yes, didn't I mention I would convert all WARN_ONs to WARN_ON_ONCEs? Oh,
well, I meant it anyway. ;)

> >
> > [...]
> >
> >> -void wlcore_reset_stopped_queues(struct wl1271 *wl)
> >> +void wlcore_wake_queues(struct wl1271 *wl,
> >> + enum wlcore_queue_stop_reason reason)
> >> {
> >> - int i;
> >> + int i, q_base;
> >> unsigned long flags;
> >>
> >> spin_lock_irqsave(&wl->wl_lock, flags);
> >>
> >> - for (i = 0; i < NUM_TX_QUEUES; i++) {
> >> - if (!wl->queue_stop_reasons[i])
> >> - continue;
> >> + for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
> >> + for (i = 0; i < NUM_TX_QUEUES; i++) {
> >> + int h = q_base * NUM_TX_QUEUES + i;
> >> + WARN_ON(!test_and_clear_bit(reason,
> >> + &wl->queue_stop_reasons[h]));
> >> + }
> >
> > Same thing here. Actually, wouldn't it make more sense to have a
> > separate bitmask for the "full-stops"? In that way, at least it would be
> > very clear that the full-stop reasons (eg. flush, restart...) cannot be
> > used with single queues.
>
> I think the WARN_ON separates this quite well already, I would prefer
> to avoid the extra complexity. Again this can be converted to
> WARN_ON_ONCE, since WARN_ON is frowned upon (and rightly so).

A WARN_ON separates it only at runtime. We could miss it if the exact
combination of factors don't happen during testing. If we would
separate them, we could also have a global all_queues_stop_reason
instead of going through all the queues and marking them. ;)

--
Luca.

2012-12-10 15:33:46

by Luciano Coelho

[permalink] [raw]

Subject: Re: [PATCH v2] wlcore: use separate HW queue for each AC in each vif

On Fri, 2012-11-30 at 00:48 +0200, Arik Nemtsov wrote:
> Start using the new hw_queue mechanism in mac80211 and give each AC in
> each vif its own hw_queue number. This allows us to stop an AC in a vif
> independently from other vifs.
>
> Change the Tx watermark handling functions to count packets per AC in
> vif. From now on fast links should not be able to hurt the throughput
> of slow links on the same AC but on different vifs.
>
> Change internal queue mgmt functions to operate per vif, to support the
> new Tx watermark granularity. Make the global versions of the queue
> stop/start functions to use the global mac80211 API for queue mgmt. This
> helps in situations where the driver currently doesn't know all the vifs
> that reside in mac80211. Recovery is a good example for such a case.
>
> Signed-off-by: Arik Nemtsov <[email protected]>
> ---
> v2: move to new vif iterator API

Thanks for respinning! :)

> diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
> index 15631fa..c9fb441 100644
> --- a/drivers/net/wireless/ti/wlcore/main.c
> +++ b/drivers/net/wireless/ti/wlcore/main.c

[...]

> @@ -2313,6 +2313,81 @@ static void wl12xx_force_active_psm(struct wl1271 *wl)
> }
> }
>
> +struct wlcore_hw_queue_iter_data {
> + unsigned long hw_queue_map[BITS_TO_LONGS(WLCORE_NUM_MAC_ADDRESSES)];
> + /* current vif */
> + struct ieee80211_vif *vif;
> + /* is the current vif among those iterated */
> + bool cur_running;
> +};
> +
> +static void wlcore_hw_queue_iter(void *data, u8 *mac,
> + struct ieee80211_vif *vif)
> +{
> + struct wlcore_hw_queue_iter_data *iter_data = data;
> +
> + if (WARN_ON(vif->hw_queue[0] == IEEE80211_INVAL_HW_QUEUE))
> + return;
> +
> + if (iter_data->cur_running || vif == iter_data->vif) {
> + iter_data->cur_running = true;
> + return;
> + }
> +
> + __set_bit(vif->hw_queue[0] / NUM_TX_QUEUES, iter_data->hw_queue_map);
> +}

Can't you just use an int to pass the q_base that you use below? Am I
missing something or are you just using the bitmap to pass the vif
number?

> +static int wlcore_allocate_hw_queue_base(struct wl1271 *wl,
> + struct wl12xx_vif *wlvif)
> +{
> + struct ieee80211_vif *vif = wl12xx_wlvif_to_vif(wlvif);
> + struct wlcore_hw_queue_iter_data iter_data = {};
> + int i, q_base;
> +
> + iter_data.vif = vif;
> +
> + /* mark all bits taken by active interfaces */
> + ieee80211_iterate_active_interfaces_atomic(wl->hw,
> + IEEE80211_IFACE_ITER_RESUME_ALL,
> + wlcore_hw_queue_iter, &iter_data);

The comment here is misleading. You don't really mark all bits, you
mark bits one by one. This iteration will only set a single bit.

> /+ /* the current vif is already running in mac80211 (resume/recovery) */
> + if (iter_data.cur_running) {
> + wlvif->hw_queue_base = vif->hw_queue[0];
> + wl1271_debug(DEBUG_MAC80211,
> + "using pre-allocated hw queue base %d",
> + wlvif->hw_queue_base);
> +
> + /* interface type might have changed type */
> + goto adjust_cab_queue;
> + }

This can probably be avoided with the new IEEE80211_IFACE_ITER_NORMAL,
can't it?

> + q_base = find_first_zero_bit(iter_data.hw_queue_map,
> + WLCORE_NUM_MAC_ADDRESSES);
> + if (q_base >= WLCORE_NUM_MAC_ADDRESSES)
> + return -EBUSY;
> +
> + wlvif->hw_queue_base = q_base * NUM_TX_QUEUES;
> + wl1271_debug(DEBUG_MAC80211, "allocating hw queue base: %d",
> + wlvif->hw_queue_base);
> +
> + for (i = 0; i < NUM_TX_QUEUES; i++) {
> + wl->queue_stop_reasons[wlvif->hw_queue_base + i] = 0;
> + /* register hw queues in mac80211 */
> + vif->hw_queue[i] = wlvif->hw_queue_base + i;
> + }
> +
> +adjust_cab_queue:
> + /* the last places are reserved for cab queues per interface */
> + if (wlvif->bss_type == BSS_TYPE_AP_BSS)
> + vif->cab_queue = NUM_TX_QUEUES * WLCORE_NUM_MAC_ADDRESSES +
> + wlvif->hw_queue_base / NUM_TX_QUEUES;
> + else
> + vif->cab_queue = IEEE80211_INVAL_HW_QUEUE;

Why not keep the reservation per-vif in the same order (ie. the cab
queue comes together with the other queues for that vif)? It doesn't
matter that you don't use the cab_queue for the non AP vifs, since
you're allocating them anyway.

Keeping them together would make this code clearer. Just make a macro
NUM_TX_QUEUES_PER_VIF that includes the space for cab_queue.

> diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
> index d799fc9..dbc1721 100644
> --- a/drivers/net/wireless/ti/wlcore/tx.c
> +++ b/drivers/net/wireless/ti/wlcore/tx.c

[...]

> @@ -1189,44 +1192,45 @@ u32 wl1271_tx_min_rate_get(struct wl1271 *wl, u32 rate_set)
> }
> EXPORT_SYMBOL_GPL(wl1271_tx_min_rate_get);
>
> -void wlcore_stop_queue_locked(struct wl1271 *wl, u8 queue,
> - enum wlcore_queue_stop_reason reason)
> +void wlcore_stop_queue_locked(struct wl1271 *wl, struct wl12xx_vif *wlvif,
> + u8 queue, enum wlcore_queue_stop_reason reason)
> {
> - bool stopped = !!wl->queue_stop_reasons[queue];
> + int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);

It would look nicer if we moved the "wlvif->hw_queue_base + " to the
wl1271_tx_get_mac80211_queue() function. After all, the function call
without adding the hw_queue_base doesn't mean anything anymore.

I'll change this when applying.

> -void wlcore_wake_queue(struct wl1271 *wl, u8 queue,
> +void wlcore_wake_queue(struct wl1271 *wl, struct wl12xx_vif *wlvif, u8 queue,
> enum wlcore_queue_stop_reason reason)
> {
> unsigned long flags;
> + int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);
>
> spin_lock_irqsave(&wl->wl_lock, flags);
> -

Stray line removal, I'll insert it back.

[...]

> @@ -1235,49 +1239,62 @@ out:
> void wlcore_stop_queues(struct wl1271 *wl,
> enum wlcore_queue_stop_reason reason)
> {
> - int i;
> + int i, q_base;
> + unsigned long flags;
>
> - for (i = 0; i < NUM_TX_QUEUES; i++)
> - wlcore_stop_queue(wl, i, reason);
> -}
> -EXPORT_SYMBOL_GPL(wlcore_stop_queues);
> + spin_lock_irqsave(&wl->wl_lock, flags);
>
> -void wlcore_wake_queues(struct wl1271 *wl,
> - enum wlcore_queue_stop_reason reason)
> -{
> - int i;
> + for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
> + for (i = 0; i < NUM_TX_QUEUES; i++) {
> + int h = q_base * NUM_TX_QUEUES + i;
> + WARN_ON(test_and_set_bit(reason,
> + &wl->queue_stop_reasons[h]));
> + }

This looks a bit hacky. You'll "stop queues" even for vifs that were
not added, won't you? Also, you can do with a single for-loop here ;)

I'm also not sure about the WARN_ON. If one of the queues is already
stopped, should we really warn? I see that, currently, the reasons to
stop all queues are not used to stop a single queue, but still. :)

[...]

> -void wlcore_reset_stopped_queues(struct wl1271 *wl)
> +void wlcore_wake_queues(struct wl1271 *wl,
> + enum wlcore_queue_stop_reason reason)
> {
> - int i;
> + int i, q_base;
> unsigned long flags;
>
> spin_lock_irqsave(&wl->wl_lock, flags);
>
> - for (i = 0; i < NUM_TX_QUEUES; i++) {
> - if (!wl->queue_stop_reasons[i])
> - continue;
> + for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
> + for (i = 0; i < NUM_TX_QUEUES; i++) {
> + int h = q_base * NUM_TX_QUEUES + i;
> + WARN_ON(!test_and_clear_bit(reason,
> + &wl->queue_stop_reasons[h]));
> + }

Same thing here. Actually, wouldn't it make more sense to have a
separate bitmask for the "full-stops"? In that way, at least it would be
very clear that the full-stop reasons (eg. flush, restart...) cannot be
used with single queues.

--
Luca.

2012-12-11 07:53:37

by Luciano Coelho

[permalink] [raw]

Subject: Re: [PATCH v2] wlcore: use separate HW queue for each AC in each vif

On Tue, 2012-12-11 at 09:38 +0200, Arik Nemtsov wrote:
> On Tue, Dec 11, 2012 at 1:09 AM, Luciano Coelho <[email protected]> wrote:
> >
> >> So for instance we want to stop all vifs during recovery, and mark all
> >> with stop reasons. That's also the reason I used the mac80211 global
> >> API for stopping/waking the queues.
> >
> > A "global" ieee80211_iterate_active_interfaces_atomic() would do the
> > same thing. ;)
> >
> > But I agree that it's simpler to set all possible vifs as stopped here.
>
> Actually your suggestion acts differently in some corner cases.
> Namely, if we stop all vifs and suddenly mac80211 decides to add a
> vif, then the new vif will not be stopped, and we will clear all stop
> reasons for it.

But the new vif should probably not be stopped in this case, if it was
added after we wanted to stop all vifs. Of course it would probably be
a bug if this happens while we're recovering or suspended.

> Then with my version, we will trigger the the WARN_ON when waking the
> queues. But this corner case is so strange that I would like a WARN_ON
> there to detect it. If we do stumble across it, we can discuss various
> solutions, including changing mac80211..

I agree that a WARN when that happens would be good, because it's most
likely a bug. With my solution, there are other ways to warn,
obviously. Maybe it would be clearer to warn if someone tries to add an
interface but the "full stop" is on-going.

> TLDR: Let's keep it the way it is today :)

Ok, but these discussions are good because it makes us think more about
the whole thing. Especially for me, now that I have a lot of stuff to
catch up with. :)

TDLR: I'll apply the patch with this part as it is.

--
Luca.

2012-12-10 18:00:03

by Arik Nemtsov

[permalink] [raw]

Subject: Re: [PATCH v2] wlcore: use separate HW queue for each AC in each vif

On Mon, Dec 10, 2012 at 5:33 PM, Luciano Coelho <[email protected]> wrote:
>> + __set_bit(vif->hw_queue[0] / NUM_TX_QUEUES, iter_data->hw_queue_map);
>> +}
>
> Can't you just use an int to pass the q_base that you use below? Am I
> missing something or are you just using the bitmap to pass the vif
> number?

That's not the case. I'm using all the bits marked. There's a special
case where the vif already exists in mac80211, we don't allocate a new
HW-queue-base and just use the existing one in vif->hw_queue_base.

>
>
>> +static int wlcore_allocate_hw_queue_base(struct wl1271 *wl,
>> + struct wl12xx_vif *wlvif)
>> +{
>> + struct ieee80211_vif *vif = wl12xx_wlvif_to_vif(wlvif);
>> + struct wlcore_hw_queue_iter_data iter_data = {};
>> + int i, q_base;
>> +
>> + iter_data.vif = vif;
>> +
>> + /* mark all bits taken by active interfaces */
>> + ieee80211_iterate_active_interfaces_atomic(wl->hw,
>> + IEEE80211_IFACE_ITER_RESUME_ALL,
>> + wlcore_hw_queue_iter, &iter_data);
>
> The comment here is misleading. You don't really mark all bits, you
> mark bits one by one. This iteration will only set a single bit.

The iteration will set several bits, one for each active interface.
Note that cur_running = true in the iterator is a special case in
suspend/resume, usually in suspend/resume. We don't have cur_running =
true when a new interface is added.

>
>
>> /+ /* the current vif is already running in mac80211 (resume/recovery) */
>> + if (iter_data.cur_running) {
>> + wlvif->hw_queue_base = vif->hw_queue[0];
>> + wl1271_debug(DEBUG_MAC80211,
>> + "using pre-allocated hw queue base %d",
>> + wlvif->hw_queue_base);
>> +
>> + /* interface type might have changed type */
>> + goto adjust_cab_queue;
>> + }
>
> This can probably be avoided with the new IEEE80211_IFACE_ITER_NORMAL,
> can't it?

we can't use normal since we need to assign the exact same HW queue
base to existing interfaces (in case of resume from suspend).
Otherwise we'll have a problem, as mac80211 doesn't flush internal
packets during suspend. So if we assign a new HW queues
to an existing vif we risk stopping/starting the wrong vifs using the
ieee80211_queue_xxx APIs.

>
>
>> + q_base = find_first_zero_bit(iter_data.hw_queue_map,
>> + WLCORE_NUM_MAC_ADDRESSES);
>> + if (q_base >= WLCORE_NUM_MAC_ADDRESSES)
>> + return -EBUSY;
>> +
>> + wlvif->hw_queue_base = q_base * NUM_TX_QUEUES;
>> + wl1271_debug(DEBUG_MAC80211, "allocating hw queue base: %d",
>> + wlvif->hw_queue_base);
>> +
>> + for (i = 0; i < NUM_TX_QUEUES; i++) {
>> + wl->queue_stop_reasons[wlvif->hw_queue_base + i] = 0;
>> + /* register hw queues in mac80211 */
>> + vif->hw_queue[i] = wlvif->hw_queue_base + i;
>> + }
>> +
>> +adjust_cab_queue:
>> + /* the last places are reserved for cab queues per interface */
>> + if (wlvif->bss_type == BSS_TYPE_AP_BSS)
>> + vif->cab_queue = NUM_TX_QUEUES * WLCORE_NUM_MAC_ADDRESSES +
>> + wlvif->hw_queue_base / NUM_TX_QUEUES;
>> + else
>> + vif->cab_queue = IEEE80211_INVAL_HW_QUEUE;
>
> Why not keep the reservation per-vif in the same order (ie. the cab
> queue comes together with the other queues for that vif)? It doesn't
> matter that you don't use the cab_queue for the non AP vifs, since
> you're allocating them anyway.
>
> Keeping them together would make this code clearer. Just make a macro
> NUM_TX_QUEUES_PER_VIF that includes the space for cab_queue.

I did it on purpose - that way the functions wlcore_wake_queues and
friends can loop on the "real" vifs and stop/start them.

>
>
>> diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
>> index d799fc9..dbc1721 100644
>> --- a/drivers/net/wireless/ti/wlcore/tx.c
>> +++ b/drivers/net/wireless/ti/wlcore/tx.c
>
> [...]
>
>> @@ -1189,44 +1192,45 @@ u32 wl1271_tx_min_rate_get(struct wl1271 *wl, u32 rate_set)
>> }
>> EXPORT_SYMBOL_GPL(wl1271_tx_min_rate_get);
>>
>> -void wlcore_stop_queue_locked(struct wl1271 *wl, u8 queue,
>> - enum wlcore_queue_stop_reason reason)
>> +void wlcore_stop_queue_locked(struct wl1271 *wl, struct wl12xx_vif *wlvif,
>> + u8 queue, enum wlcore_queue_stop_reason reason)
>> {
>> - bool stopped = !!wl->queue_stop_reasons[queue];
>> + int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);
>
> It would look nicer if we moved the "wlvif->hw_queue_base + " to the
> wl1271_tx_get_mac80211_queue() function. After all, the function call
> without adding the hw_queue_base doesn't mean anything anymore.

well a rename is necessary as well then, since this is not a simple
translation between mac8021 and wlcore jargon for queues..

>
> I'll change this when applying.
>
>
>> -void wlcore_wake_queue(struct wl1271 *wl, u8 queue,
>> +void wlcore_wake_queue(struct wl1271 *wl, struct wl12xx_vif *wlvif, u8 queue,
>> enum wlcore_queue_stop_reason reason)
>> {
>> unsigned long flags;
>> + int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);
>>
>> spin_lock_irqsave(&wl->wl_lock, flags);
>> -
>
> Stray line removal, I'll insert it back.

thanks.

>
> [...]
>
>> @@ -1235,49 +1239,62 @@ out:
>> void wlcore_stop_queues(struct wl1271 *wl,
>> enum wlcore_queue_stop_reason reason)
>> {
>> - int i;
>> + int i, q_base;
>> + unsigned long flags;
>>
>> - for (i = 0; i < NUM_TX_QUEUES; i++)
>> - wlcore_stop_queue(wl, i, reason);
>> -}
>> -EXPORT_SYMBOL_GPL(wlcore_stop_queues);
>> + spin_lock_irqsave(&wl->wl_lock, flags);
>>
>> -void wlcore_wake_queues(struct wl1271 *wl,
>> - enum wlcore_queue_stop_reason reason)
>> -{
>> - int i;
>> + for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
>> + for (i = 0; i < NUM_TX_QUEUES; i++) {
>> + int h = q_base * NUM_TX_QUEUES + i;
>> + WARN_ON(test_and_set_bit(reason,
>> + &wl->queue_stop_reasons[h]));
>> + }
>
> This looks a bit hacky. You'll "stop queues" even for vifs that were
> not added, won't you? Also, you can do with a single for-loop here ;)

there's no harm in setting the bit for queues that were not added, as
we zero out all stop reasons when a vif is added. we need to zero-out
the reasons anyway in case someone removes a stopped vif..

>
> I'm also not sure about the WARN_ON. If one of the queues is already
> stopped, should we really warn? I see that, currently, the reasons to
> stop all queues are not used to stop a single queue, but still. :)

you're not supposed to use the global API if you've already used a
per-vif stop/start API. Generally there are reasons we stop a vif for,
and other reasons we stop all vifs for. There should not be an overlap
between reasons as that doesn't make sense.
I guess we could convert it to WARN_ON_ONCE though.

>
> [...]
>
>> -void wlcore_reset_stopped_queues(struct wl1271 *wl)
>> +void wlcore_wake_queues(struct wl1271 *wl,
>> + enum wlcore_queue_stop_reason reason)
>> {
>> - int i;
>> + int i, q_base;
>> unsigned long flags;
>>
>> spin_lock_irqsave(&wl->wl_lock, flags);
>>
>> - for (i = 0; i < NUM_TX_QUEUES; i++) {
>> - if (!wl->queue_stop_reasons[i])
>> - continue;
>> + for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
>> + for (i = 0; i < NUM_TX_QUEUES; i++) {
>> + int h = q_base * NUM_TX_QUEUES + i;
>> + WARN_ON(!test_and_clear_bit(reason,
>> + &wl->queue_stop_reasons[h]));
>> + }
>
> Same thing here. Actually, wouldn't it make more sense to have a
> separate bitmask for the "full-stops"? In that way, at least it would be
> very clear that the full-stop reasons (eg. flush, restart...) cannot be
> used with single queues.

I think the WARN_ON separates this quite well already, I would prefer
to avoid the extra complexity. Again this can be converted to
WARN_ON_ONCE, since WARN_ON is frowned upon (and rightly so).

Arik

2012-12-10 23:10:27

by Luciano Coelho

[permalink] [raw]

Subject: Re: [PATCH v2] wlcore: use separate HW queue for each AC in each vif

On Tue, 2012-12-11 at 00:18 +0200, Arik Nemtsov wrote:
> On Mon, Dec 10, 2012 at 11:50 PM, Luciano Coelho <[email protected]> wrote:
> >> we can't use normal since we need to assign the exact same HW queue
> >> base to existing interfaces (in case of resume from suspend).
> >> Otherwise we'll have a problem, as mac80211 doesn't flush internal
> >> packets during suspend. So if we assign a new HW queues
> >> to an existing vif we risk stopping/starting the wrong vifs using the
> >> ieee80211_queue_xxx APIs.
> >
> > Isn't the only difference of using _NORMAL that mac80211 won't call the
> > iterator function when the driver already knows about the interface?
>
> The other way around - the iterator won't be called for interfaces not
> yet added to the driver that already exist in mac80211.

Right, I see it in the code.

> Using the iterator as is, we gain information on which interfaces
> mac80211 knows, regardless of the lower driver. So if the current
> interface already exists, it means this is recovery/resume.

Ok.

> Actually Johannes came up with this little bit of black magic.

Macumba! :)

> >> >> + if (q_base >= WLCORE_NUM_MAC_ADDRESSES)
> >> >> + return -EBUSY;
> >> >> +
> >> >> + wlvif->hw_queue_base = q_base * NUM_TX_QUEUES;
> >> >> + wl1271_debug(DEBUG_MAC80211, "allocating hw queue base: %d",
> >> >> + wlvif->hw_queue_base);
> >> >> +
> >> >> + for (i = 0; i < NUM_TX_QUEUES; i++) {
> >> >> + wl->queue_stop_reasons[wlvif->hw_queue_base + i] = 0;
> >> >> + /* register hw queues in mac80211 */
> >> >> + vif->hw_queue[i] = wlvif->hw_queue_base + i;
> >> >> + }
> >> >> +
> >> >> +adjust_cab_queue:
> >> >> + /* the last places are reserved for cab queues per interface */
> >> >> + if (wlvif->bss_type == BSS_TYPE_AP_BSS)
> >> >> + vif->cab_queue = NUM_TX_QUEUES * WLCORE_NUM_MAC_ADDRESSES +
> >> >> + wlvif->hw_queue_base / NUM_TX_QUEUES;
> >> >> + else
> >> >> + vif->cab_queue = IEEE80211_INVAL_HW_QUEUE;
> >> >
> >> > Why not keep the reservation per-vif in the same order (ie. the cab
> >> > queue comes together with the other queues for that vif)? It doesn't
> >> > matter that you don't use the cab_queue for the non AP vifs, since
> >> > you're allocating them anyway.
> >> >
> >> > Keeping them together would make this code clearer. Just make a macro
> >> > NUM_TX_QUEUES_PER_VIF that includes the space for cab_queue.
> >>
> >> I did it on purpose - that way the functions wlcore_wake_queues and
> >> friends can loop on the "real" vifs and stop/start them.
> >
> > I don't see the difference. They can still do it all the same.
>
> It's a matter of convenience. It was easier for me to picture the real
> HW queues as contiguous and throw the fake ones off to the side.
> The off-channel HW queue is also fake (but global) and gets treated the same..

Okay, that's fine. It is easier for me to picture the queues (fake or
not) contiguous per-vif. :) The off-channel queue is special because it
is global. Anyway, no need to argue, both views are fine and directed
at the same point, so let's keep it as it is. ;)

> >> >> diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
> >> >> index d799fc9..dbc1721 100644
> >> >> --- a/drivers/net/wireless/ti/wlcore/tx.c
> >> >> +++ b/drivers/net/wireless/ti/wlcore/tx.c
> >> >
> >> > [...]
> >> >
> >> >> @@ -1189,44 +1192,45 @@ u32 wl1271_tx_min_rate_get(struct wl1271 *wl, u32 rate_set)
> >> >> }
> >> >> EXPORT_SYMBOL_GPL(wl1271_tx_min_rate_get);
> >> >>
> >> >> -void wlcore_stop_queue_locked(struct wl1271 *wl, u8 queue,
> >> >> - enum wlcore_queue_stop_reason reason)
> >> >> +void wlcore_stop_queue_locked(struct wl1271 *wl, struct wl12xx_vif *wlvif,
> >> >> + u8 queue, enum wlcore_queue_stop_reason reason)
> >> >> {
> >> >> - bool stopped = !!wl->queue_stop_reasons[queue];
> >> >> + int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);
> >> >
> >> > It would look nicer if we moved the "wlvif->hw_queue_base + " to the
> >> > wl1271_tx_get_mac80211_queue() function. After all, the function call
> >> > without adding the hw_queue_base doesn't mean anything anymore.
> >>
> >> well a rename is necessary as well then, since this is not a simple
> >> translation between mac8021 and wlcore jargon for queues..
> >
> > I don't think we need to rename. I think wl1271_tx_get_mac80211_queue()
> > suits very well to include the hw_queue_base. It would be a more
> > complete translation, actually.
>
> I won't press the issue :)

:)

> >> >> @@ -1235,49 +1239,62 @@ out:
> >> >> void wlcore_stop_queues(struct wl1271 *wl,
> >> >> enum wlcore_queue_stop_reason reason)
> >> >> {
> >> >> - int i;
> >> >> + int i, q_base;
> >> >> + unsigned long flags;
> >> >>
> >> >> - for (i = 0; i < NUM_TX_QUEUES; i++)
> >> >> - wlcore_stop_queue(wl, i, reason);
> >> >> -}
> >> >> -EXPORT_SYMBOL_GPL(wlcore_stop_queues);
> >> >> + spin_lock_irqsave(&wl->wl_lock, flags);
> >> >>
> >> >> -void wlcore_wake_queues(struct wl1271 *wl,
> >> >> - enum wlcore_queue_stop_reason reason)
> >> >> -{
> >> >> - int i;
> >> >> + for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
> >> >> + for (i = 0; i < NUM_TX_QUEUES; i++) {
> >> >> + int h = q_base * NUM_TX_QUEUES + i;
> >> >> + WARN_ON(test_and_set_bit(reason,
> >> >> + &wl->queue_stop_reasons[h]));
> >> >> + }
> >> >
> >> > This looks a bit hacky. You'll "stop queues" even for vifs that were
> >> > not added, won't you? Also, you can do with a single for-loop here ;)
> >>
> >> there's no harm in setting the bit for queues that were not added, as
> >> we zero out all stop reasons when a vif is added. we need to zero-out
> >> the reasons anyway in case someone removes a stopped vif..
> >
> > Yeah, I know there's no harm, but it still looks a bit ugly. It's
> > probably not efficient to iterate over the existing vifs to set the stop
> > reasons, but that would look nicer.
>
> Note that "existing" is tricky here as sometimes vifs are removed from
> the driver (suspend/recovery) but still exist in mac80211.

It's the same deal as above, isn't it?

> So for instance we want to stop all vifs during recovery, and mark all
> with stop reasons. That's also the reason I used the mac80211 global
> API for stopping/waking the queues.

A "global" ieee80211_iterate_active_interfaces_atomic() would do the
same thing. ;)

But I agree that it's simpler to set all possible vifs as stopped here.

> > In any case, if you want a quick and dirty way of doing it without
> > caring about the actual vifs, it could be like this:
> >
> > for (i = 0; i < WLCORE_NUM_MAC_ADDRESSES * NUM_TX_QUEUES; i++)
> > WARN_ON(test_and_set_bit(reason,
> > &wl->queue_stop_reasons[i]));
> >
> > Your double-loop would make more sense if the cabs were included in the
> > same block per vif, as I suggested, though.
>
> The compiler optimizes away the difference between my code and yours,
> and I thought my version was a bit clearer, but yours is ok as well.

I'm not worried about optimization, the compiler is smarter than me. :)
But I think my version is easier to read, especially if we add a comment
like "/* stop all possible queues */".

> >> > I'm also not sure about the WARN_ON. If one of the queues is already
> >> > stopped, should we really warn? I see that, currently, the reasons to
> >> > stop all queues are not used to stop a single queue, but still. :)
> >>
> >> you're not supposed to use the global API if you've already used a
> >> per-vif stop/start API. Generally there are reasons we stop a vif for,
> >> and other reasons we stop all vifs for. There should not be an overlap
> >> between reasons as that doesn't make sense.
> >
> > Yes, this makes my statement below more significant. :)
> >
> >
> >> I guess we could convert it to WARN_ON_ONCE though.
> >
> > Yes, didn't I mention I would convert all WARN_ONs to WARN_ON_ONCEs? Oh,
> > well, I meant it anyway. ;)
>
> :)
>
> >
> >
> >> >
> >> > [...]
> >> >
> >> >> -void wlcore_reset_stopped_queues(struct wl1271 *wl)
> >> >> +void wlcore_wake_queues(struct wl1271 *wl,
> >> >> + enum wlcore_queue_stop_reason reason)
> >> >> {
> >> >> - int i;
> >> >> + int i, q_base;
> >> >> unsigned long flags;
> >> >>
> >> >> spin_lock_irqsave(&wl->wl_lock, flags);
> >> >>
> >> >> - for (i = 0; i < NUM_TX_QUEUES; i++) {
> >> >> - if (!wl->queue_stop_reasons[i])
> >> >> - continue;
> >> >> + for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
> >> >> + for (i = 0; i < NUM_TX_QUEUES; i++) {
> >> >> + int h = q_base * NUM_TX_QUEUES + i;
> >> >> + WARN_ON(!test_and_clear_bit(reason,
> >> >> + &wl->queue_stop_reasons[h]));
> >> >> + }
> >> >
> >> > Same thing here. Actually, wouldn't it make more sense to have a
> >> > separate bitmask for the "full-stops"? In that way, at least it would be
> >> > very clear that the full-stop reasons (eg. flush, restart...) cannot be
> >> > used with single queues.
> >>
> >> I think the WARN_ON separates this quite well already, I would prefer
> >> to avoid the extra complexity. Again this can be converted to
> >> WARN_ON_ONCE, since WARN_ON is frowned upon (and rightly so).
> >
> > A WARN_ON separates it only at runtime. We could miss it if the exact
> > combination of factors don't happen during testing. If we would
> > separate them, we could also have a global all_queues_stop_reason
> > instead of going through all the queues and marking them. ;)
>
> I guess it's a good point. But I think this should be in a separate
> patch. This is not directly related to this one and would complicate
> it as is.

Agreed.

--
Luca.

2012-12-11 07:38:42

by Arik Nemtsov

[permalink] [raw]

Subject: Re: [PATCH v2] wlcore: use separate HW queue for each AC in each vif

On Tue, Dec 11, 2012 at 1:09 AM, Luciano Coelho <[email protected]> wrote:
>
>> So for instance we want to stop all vifs during recovery, and mark all
>> with stop reasons. That's also the reason I used the mac80211 global
>> API for stopping/waking the queues.
>
> A "global" ieee80211_iterate_active_interfaces_atomic() would do the
> same thing. ;)
>
> But I agree that it's simpler to set all possible vifs as stopped here.

Actually your suggestion acts differently in some corner cases.
Namely, if we stop all vifs and suddenly mac80211 decides to add a
vif, then the new vif will not be stopped, and we will clear all stop
reasons for it.
Then with my version, we will trigger the the WARN_ON when waking the
queues. But this corner case is so strange that I would like a WARN_ON
there to detect it. If we do stumble across it, we can discuss various
solutions, including changing mac80211..

TLDR: Let's keep it the way it is today :)

2012-12-10 22:18:46

by Arik Nemtsov

[permalink] [raw]

Subject: Re: [PATCH v2] wlcore: use separate HW queue for each AC in each vif

On Mon, Dec 10, 2012 at 11:50 PM, Luciano Coelho <[email protected]> wrote:
>> we can't use normal since we need to assign the exact same HW queue
>> base to existing interfaces (in case of resume from suspend).
>> Otherwise we'll have a problem, as mac80211 doesn't flush internal
>> packets during suspend. So if we assign a new HW queues
>> to an existing vif we risk stopping/starting the wrong vifs using the
>> ieee80211_queue_xxx APIs.
>
> Isn't the only difference of using _NORMAL that mac80211 won't call the
> iterator function when the driver already knows about the interface?

The other way around - the iterator won't be called for interfaces not
yet added to the driver that already exist in mac80211.

Using the iterator as is, we gain information on which interfaces
mac80211 knows, regardless of the lower driver. So if the current
interface already exists, it means this is recovery/resume.

Actually Johannes came up with this little bit of black magic.

>
>> >> + if (q_base >= WLCORE_NUM_MAC_ADDRESSES)
>> >> + return -EBUSY;
>> >> +
>> >> + wlvif->hw_queue_base = q_base * NUM_TX_QUEUES;
>> >> + wl1271_debug(DEBUG_MAC80211, "allocating hw queue base: %d",
>> >> + wlvif->hw_queue_base);
>> >> +
>> >> + for (i = 0; i < NUM_TX_QUEUES; i++) {
>> >> + wl->queue_stop_reasons[wlvif->hw_queue_base + i] = 0;
>> >> + /* register hw queues in mac80211 */
>> >> + vif->hw_queue[i] = wlvif->hw_queue_base + i;
>> >> + }
>> >> +
>> >> +adjust_cab_queue:
>> >> + /* the last places are reserved for cab queues per interface */
>> >> + if (wlvif->bss_type == BSS_TYPE_AP_BSS)
>> >> + vif->cab_queue = NUM_TX_QUEUES * WLCORE_NUM_MAC_ADDRESSES +
>> >> + wlvif->hw_queue_base / NUM_TX_QUEUES;
>> >> + else
>> >> + vif->cab_queue = IEEE80211_INVAL_HW_QUEUE;
>> >
>> > Why not keep the reservation per-vif in the same order (ie. the cab
>> > queue comes together with the other queues for that vif)? It doesn't
>> > matter that you don't use the cab_queue for the non AP vifs, since
>> > you're allocating them anyway.
>> >
>> > Keeping them together would make this code clearer. Just make a macro
>> > NUM_TX_QUEUES_PER_VIF that includes the space for cab_queue.
>>
>> I did it on purpose - that way the functions wlcore_wake_queues and
>> friends can loop on the "real" vifs and stop/start them.
>
> I don't see the difference. They can still do it all the same.

It's a matter of convenience. It was easier for me to picture the real
HW queues as contiguous and throw the fake ones off to the side.
The off-channel HW queue is also fake (but global) and gets treated the same..

>
>
>> >> diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
>> >> index d799fc9..dbc1721 100644
>> >> --- a/drivers/net/wireless/ti/wlcore/tx.c
>> >> +++ b/drivers/net/wireless/ti/wlcore/tx.c
>> >
>> > [...]
>> >
>> >> @@ -1189,44 +1192,45 @@ u32 wl1271_tx_min_rate_get(struct wl1271 *wl, u32 rate_set)
>> >> }
>> >> EXPORT_SYMBOL_GPL(wl1271_tx_min_rate_get);
>> >>
>> >> -void wlcore_stop_queue_locked(struct wl1271 *wl, u8 queue,
>> >> - enum wlcore_queue_stop_reason reason)
>> >> +void wlcore_stop_queue_locked(struct wl1271 *wl, struct wl12xx_vif *wlvif,
>> >> + u8 queue, enum wlcore_queue_stop_reason reason)
>> >> {
>> >> - bool stopped = !!wl->queue_stop_reasons[queue];
>> >> + int hwq = wlvif->hw_queue_base + wl1271_tx_get_mac80211_queue(queue);
>> >
>> > It would look nicer if we moved the "wlvif->hw_queue_base + " to the
>> > wl1271_tx_get_mac80211_queue() function. After all, the function call
>> > without adding the hw_queue_base doesn't mean anything anymore.
>>
>> well a rename is necessary as well then, since this is not a simple
>> translation between mac8021 and wlcore jargon for queues..
>
> I don't think we need to rename. I think wl1271_tx_get_mac80211_queue()
> suits very well to include the hw_queue_base. It would be a more
> complete translation, actually.

I won't press the issue :)

>
>
>> >> @@ -1235,49 +1239,62 @@ out:
>> >> void wlcore_stop_queues(struct wl1271 *wl,
>> >> enum wlcore_queue_stop_reason reason)
>> >> {
>> >> - int i;
>> >> + int i, q_base;
>> >> + unsigned long flags;
>> >>
>> >> - for (i = 0; i < NUM_TX_QUEUES; i++)
>> >> - wlcore_stop_queue(wl, i, reason);
>> >> -}
>> >> -EXPORT_SYMBOL_GPL(wlcore_stop_queues);
>> >> + spin_lock_irqsave(&wl->wl_lock, flags);
>> >>
>> >> -void wlcore_wake_queues(struct wl1271 *wl,
>> >> - enum wlcore_queue_stop_reason reason)
>> >> -{
>> >> - int i;
>> >> + for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
>> >> + for (i = 0; i < NUM_TX_QUEUES; i++) {
>> >> + int h = q_base * NUM_TX_QUEUES + i;
>> >> + WARN_ON(test_and_set_bit(reason,
>> >> + &wl->queue_stop_reasons[h]));
>> >> + }
>> >
>> > This looks a bit hacky. You'll "stop queues" even for vifs that were
>> > not added, won't you? Also, you can do with a single for-loop here ;)
>>
>> there's no harm in setting the bit for queues that were not added, as
>> we zero out all stop reasons when a vif is added. we need to zero-out
>> the reasons anyway in case someone removes a stopped vif..
>
> Yeah, I know there's no harm, but it still looks a bit ugly. It's
> probably not efficient to iterate over the existing vifs to set the stop
> reasons, but that would look nicer.

Note that "existing" is tricky here as sometimes vifs are removed from
the driver (suspend/recovery) but still exist in mac80211.
So for instance we want to stop all vifs during recovery, and mark all
with stop reasons. That's also the reason I used the mac80211 global
API for stopping/waking the queues.

>
> In any case, if you want a quick and dirty way of doing it without
> caring about the actual vifs, it could be like this:
>
> for (i = 0; i < WLCORE_NUM_MAC_ADDRESSES * NUM_TX_QUEUES; i++)
> WARN_ON(test_and_set_bit(reason,
> &wl->queue_stop_reasons[i]));
>
> Your double-loop would make more sense if the cabs were included in the
> same block per vif, as I suggested, though.

The compiler optimizes away the difference between my code and yours,
and I thought my version was a bit clearer, but yours is ok as well.

>
>
>> > I'm also not sure about the WARN_ON. If one of the queues is already
>> > stopped, should we really warn? I see that, currently, the reasons to
>> > stop all queues are not used to stop a single queue, but still. :)
>>
>> you're not supposed to use the global API if you've already used a
>> per-vif stop/start API. Generally there are reasons we stop a vif for,
>> and other reasons we stop all vifs for. There should not be an overlap
>> between reasons as that doesn't make sense.
>
> Yes, this makes my statement below more significant. :)
>
>
>> I guess we could convert it to WARN_ON_ONCE though.
>
> Yes, didn't I mention I would convert all WARN_ONs to WARN_ON_ONCEs? Oh,
> well, I meant it anyway. ;)

:)

>
>
>> >
>> > [...]
>> >
>> >> -void wlcore_reset_stopped_queues(struct wl1271 *wl)
>> >> +void wlcore_wake_queues(struct wl1271 *wl,
>> >> + enum wlcore_queue_stop_reason reason)
>> >> {
>> >> - int i;
>> >> + int i, q_base;
>> >> unsigned long flags;
>> >>
>> >> spin_lock_irqsave(&wl->wl_lock, flags);
>> >>
>> >> - for (i = 0; i < NUM_TX_QUEUES; i++) {
>> >> - if (!wl->queue_stop_reasons[i])
>> >> - continue;
>> >> + for (q_base = 0; q_base < WLCORE_NUM_MAC_ADDRESSES; q_base++)
>> >> + for (i = 0; i < NUM_TX_QUEUES; i++) {
>> >> + int h = q_base * NUM_TX_QUEUES + i;
>> >> + WARN_ON(!test_and_clear_bit(reason,
>> >> + &wl->queue_stop_reasons[h]));
>> >> + }
>> >
>> > Same thing here. Actually, wouldn't it make more sense to have a
>> > separate bitmask for the "full-stops"? In that way, at least it would be
>> > very clear that the full-stop reasons (eg. flush, restart...) cannot be
>> > used with single queues.
>>
>> I think the WARN_ON separates this quite well already, I would prefer
>> to avoid the extra complexity. Again this can be converted to
>> WARN_ON_ONCE, since WARN_ON is frowned upon (and rightly so).
>
> A WARN_ON separates it only at runtime. We could miss it if the exact
> combination of factors don't happen during testing. If we would
> separate them, we could also have a global all_queues_stop_reason
> instead of going through all the queues and marking them. ;)

I guess it's a good point. But I think this should be in a separate
patch. This is not directly related to this one and would complicate
it as is.