2010-10-12 12:49:22

by Ido Yariv

[permalink] [raw]
Subject: [PATCH v3 0/4] wl1271: TX optimizations & fixes

The following patches fix some issues in the TX path, as well as optimize it.

The first patch fixes an issue in wl1271_tx_work. In case the aggregation
buffer is completely filled, the content of the buffer is transferred and no
more packets are sent. Fixed by flushing the buffer and continue aggregating
packets.

The second patch solves a TX starvation issue in wl1271_irq_work. Since TX is
handled by wl1271_tx_work, packets are transmitted after all interrupts are
handled in wl1271_irq_work. Since these include TX completion interrupts the
FW status might be read multiple times needlessly, which could hurt performance.

The third patch is more of a cosmetic change. Instead of traversing the array
of TX descriptors in order to find a free entry, use a bitmap for that
purpose.

The last patch fixes an issue with the TX queue low watermark. The number of
items in the TX queue is checked against the low watermark in
wl1271_tx_complete. However, the fact that a TX completion interrupt was fired
does not necessarily mean that there are any less skbs in the TX queue. Fixed
by moving the handling logic to the TX work, after skbs are actually dequeued.

These patches were tested on a Zoom2 platform (SDIO only). While throughput in
RX scenarios was hardly affected, throughput in TX scenarios was significantly
improved.

Changes from v2:
- Remove the restriction on the maximum number of TX descriptors being 32
Changes from v1:
- Fix a theoretical potential deadlock in irq_work and tx_work. Instead of
cancelling redundant work, avoid scheduling it in the first place.
- Check if the low watermark was reached only if skbs were really dequeued

Ido Yariv (4):
wl1271: TX aggregation optimization
wl1271: Fix TX starvation
wl1271: Allocate TX descriptors more efficiently
wl1271: Fix TX queue low watermark handling

drivers/net/wireless/wl12xx/wl1271.h | 2 +
drivers/net/wireless/wl12xx/wl1271_main.c | 20 ++++-
drivers/net/wireless/wl12xx/wl1271_tx.c | 114 ++++++++++++++++++----------
drivers/net/wireless/wl12xx/wl1271_tx.h | 1 +
4 files changed, 92 insertions(+), 45 deletions(-)



2010-10-12 12:49:47

by Ido Yariv

[permalink] [raw]
Subject: [PATCH v3 3/4] wl1271: Allocate TX descriptors more efficiently

On each TX descriptor allocation, a free entry is found by traversing the TX
descriptors array.

Improve this by holding a bitmap of all TX descriptors, and using efficient
bit operations to search for free entries.

Signed-off-by: Ido Yariv <[email protected]>
---
drivers/net/wireless/wl12xx/wl1271.h | 1 +
drivers/net/wireless/wl12xx/wl1271_main.c | 1 +
drivers/net/wireless/wl12xx/wl1271_tx.c | 38 ++++++++++++++++------------
3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/wl12xx/wl1271.h b/drivers/net/wireless/wl12xx/wl1271.h
index 4a034a3..718e96d 100644
--- a/drivers/net/wireless/wl12xx/wl1271.h
+++ b/drivers/net/wireless/wl12xx/wl1271.h
@@ -398,6 +398,7 @@ struct wl1271 {
struct work_struct tx_work;

/* Pending TX frames */
+ unsigned long tx_frames_map[BITS_TO_LONGS(ACX_TX_DESCRIPTORS)];
struct sk_buff *tx_frames[ACX_TX_DESCRIPTORS];
int tx_frames_cnt;

diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c
index 18aff22..0fd4725 100644
--- a/drivers/net/wireless/wl12xx/wl1271_main.c
+++ b/drivers/net/wireless/wl12xx/wl1271_main.c
@@ -2532,6 +2532,7 @@ struct ieee80211_hw *wl1271_alloc_hw(void)
wl->sg_enabled = true;
wl->hw_pg_ver = -1;

+ memset(wl->tx_frames_map, 0, sizeof(wl->tx_frames_map));
for (i = 0; i < ACX_TX_DESCRIPTORS; i++)
wl->tx_frames[i] = NULL;

diff --git a/drivers/net/wireless/wl12xx/wl1271_tx.c b/drivers/net/wireless/wl12xx/wl1271_tx.c
index cf32a77..a3a29ea 100644
--- a/drivers/net/wireless/wl12xx/wl1271_tx.c
+++ b/drivers/net/wireless/wl12xx/wl1271_tx.c
@@ -30,17 +30,26 @@
#include "wl1271_ps.h"
#include "wl1271_tx.h"

-static int wl1271_tx_id(struct wl1271 *wl, struct sk_buff *skb)
+static int wl1271_alloc_tx_id(struct wl1271 *wl, struct sk_buff *skb)
{
- int i;
- for (i = 0; i < ACX_TX_DESCRIPTORS; i++)
- if (wl->tx_frames[i] == NULL) {
- wl->tx_frames[i] = skb;
- wl->tx_frames_cnt++;
- return i;
- }
+ int id;
+
+ id = find_first_zero_bit(wl->tx_frames_map, ACX_TX_DESCRIPTORS);
+ if (id >= ACX_TX_DESCRIPTORS)
+ return -EBUSY;
+
+ set_bit(id, wl->tx_frames_map);
+ wl->tx_frames[id] = skb;
+ wl->tx_frames_cnt++;
+ return id;
+}

- return -EBUSY;
+static void wl1271_free_tx_id(struct wl1271 *wl, int id)
+{
+ if (test_and_clear_bit(id, wl->tx_frames_map)) {
+ wl->tx_frames[id] = NULL;
+ wl->tx_frames_cnt--;
+ }
}

static int wl1271_tx_allocate(struct wl1271 *wl, struct sk_buff *skb, u32 extra,
@@ -55,7 +64,7 @@ static int wl1271_tx_allocate(struct wl1271 *wl, struct sk_buff *skb, u32 extra,
return -EAGAIN;

/* allocate free identifier for the packet */
- id = wl1271_tx_id(wl, skb);
+ id = wl1271_alloc_tx_id(wl, skb);
if (id < 0)
return id;

@@ -79,8 +88,7 @@ static int wl1271_tx_allocate(struct wl1271 *wl, struct sk_buff *skb, u32 extra,
"tx_allocate: size: %d, blocks: %d, id: %d",
total_len, total_blocks, id);
} else {
- wl->tx_frames[id] = NULL;
- wl->tx_frames_cnt--;
+ wl1271_free_tx_id(wl, id);
}

return ret;
@@ -352,8 +360,7 @@ static void wl1271_tx_complete_packet(struct wl1271 *wl,

/* return the packet to the stack */
ieee80211_tx_status(wl->hw, skb);
- wl->tx_frames[result->id] = NULL;
- wl->tx_frames_cnt--;
+ wl1271_free_tx_id(wl, result->id);
}

/* Called upon reception of a TX complete interrupt */
@@ -422,11 +429,10 @@ void wl1271_tx_reset(struct wl1271 *wl)
for (i = 0; i < ACX_TX_DESCRIPTORS; i++)
if (wl->tx_frames[i] != NULL) {
skb = wl->tx_frames[i];
- wl->tx_frames[i] = NULL;
+ wl1271_free_tx_id(wl, i);
wl1271_debug(DEBUG_TX, "freeing skb 0x%p", skb);
ieee80211_tx_status(wl->hw, skb);
}
- wl->tx_frames_cnt = 0;
}

#define WL1271_TX_FLUSH_TIMEOUT 500000
--
1.7.0.4


2010-10-18 13:51:44

by Luciano Coelho

[permalink] [raw]
Subject: Re: [PATCH v3 0/4] wl1271: TX optimizations & fixes

On Tue, 2010-10-12 at 14:49 +0200, ext Ido Yariv wrote:
> The following patches fix some issues in the TX path, as well as optimize it.
>
> The first patch fixes an issue in wl1271_tx_work. In case the aggregation
> buffer is completely filled, the content of the buffer is transferred and no
> more packets are sent. Fixed by flushing the buffer and continue aggregating
> packets.
>
> The second patch solves a TX starvation issue in wl1271_irq_work. Since TX is
> handled by wl1271_tx_work, packets are transmitted after all interrupts are
> handled in wl1271_irq_work. Since these include TX completion interrupts the
> FW status might be read multiple times needlessly, which could hurt performance.
>
> The third patch is more of a cosmetic change. Instead of traversing the array
> of TX descriptors in order to find a free entry, use a bitmap for that
> purpose.
>
> The last patch fixes an issue with the TX queue low watermark. The number of
> items in the TX queue is checked against the low watermark in
> wl1271_tx_complete. However, the fact that a TX completion interrupt was fired
> does not necessarily mean that there are any less skbs in the TX queue. Fixed
> by moving the handling logic to the TX work, after skbs are actually dequeued.
>
> These patches were tested on a Zoom2 platform (SDIO only). While throughput in
> RX scenarios was hardly affected, throughput in TX scenarios was significantly
> improved.
>
> Changes from v2:
> - Remove the restriction on the maximum number of TX descriptors being 32
> Changes from v1:
> - Fix a theoretical potential deadlock in irq_work and tx_work. Instead of
> cancelling redundant work, avoid scheduling it in the first place.
> - Check if the low watermark was reached only if skbs were really dequeued
>
> Ido Yariv (4):
> wl1271: TX aggregation optimization
> wl1271: Fix TX starvation
> wl1271: Allocate TX descriptors more efficiently
> wl1271: Fix TX queue low watermark handling

Applied the series. Thank you!


--
Cheers,
Luca.


2010-10-12 14:15:52

by Ido Yariv

[permalink] [raw]
Subject: Re: [PATCH v3 3/4] wl1271: Allocate TX descriptors more efficiently

On Tue, Oct 12, 2010 at 02:56:28PM +0200, Johannes Berg wrote:
> This is all under some lock, right? (If that isn't the case, it's racy)
Right.

> Therefore, you need not use atomic bitops which will be fairly
> expensive...
>
> Have you actually measured this? It seems that it con't have a huge
> effect, and the atomic bitops you use now might even negate it.
Like I mentioned in the cover letter, this is more of a cosmetic change
than a real optimization. It doesn't really improve performance, but the
code looks nicer.
Using non atomic bitops cannot hurt, I guess.

Thanks,
Ido.

2010-10-12 12:49:48

by Ido Yariv

[permalink] [raw]
Subject: [PATCH v3 2/4] wl1271: Fix TX starvation

While wl1271_irq_work handles RX directly (by calling wl1271_rx), a different
work is scheduled for transmitting packets. The IRQ work might handle more than
one interrupt during a single call, including multiple TX completion
interrupts. This might starve TX, since no packets are transmitted until all
interrupts are handled.

Fix this by calling the TX work function directly, instead of deferring
it.

Signed-off-by: Ido Yariv <[email protected]>
---
drivers/net/wireless/wl12xx/wl1271.h | 1 +
drivers/net/wireless/wl12xx/wl1271_main.c | 19 +++++++++++++++----
drivers/net/wireless/wl12xx/wl1271_tx.c | 14 ++++++++++----
drivers/net/wireless/wl12xx/wl1271_tx.h | 1 +
4 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/wl12xx/wl1271.h b/drivers/net/wireless/wl12xx/wl1271.h
index 8a4cd76..4a034a3 100644
--- a/drivers/net/wireless/wl12xx/wl1271.h
+++ b/drivers/net/wireless/wl12xx/wl1271.h
@@ -351,6 +351,7 @@ struct wl1271 {
#define WL1271_FLAG_IDLE_REQUESTED (11)
#define WL1271_FLAG_PSPOLL_FAILURE (12)
#define WL1271_FLAG_STA_STATE_SENT (13)
+#define WL1271_FLAG_FW_TX_BUSY (14)
unsigned long flags;

struct wl1271_partition_set part;
diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c
index 48a4b99..18aff22 100644
--- a/drivers/net/wireless/wl12xx/wl1271_main.c
+++ b/drivers/net/wireless/wl12xx/wl1271_main.c
@@ -481,9 +481,9 @@ static void wl1271_fw_status(struct wl1271 *wl,
total += cnt;
}

- /* if more blocks are available now, schedule some tx work */
- if (total && !skb_queue_empty(&wl->tx_queue))
- ieee80211_queue_work(wl->hw, &wl->tx_work);
+ /* if more blocks are available now, tx work can be scheduled */
+ if (total)
+ clear_bit(WL1271_FLAG_FW_TX_BUSY, &wl->flags);

/* update the host-chipset time offset */
getnstimeofday(&ts);
@@ -537,6 +537,16 @@ static void wl1271_irq_work(struct work_struct *work)
(wl->tx_results_count & 0xff))
wl1271_tx_complete(wl);

+ /* Check if any tx blocks were freed */
+ if (!test_bit(WL1271_FLAG_FW_TX_BUSY, &wl->flags) &&
+ !skb_queue_empty(&wl->tx_queue)) {
+ /*
+ * In order to avoid starvation of the TX path,
+ * call the work function directly.
+ */
+ wl1271_tx_work_locked(wl);
+ }
+
wl1271_rx(wl, wl->fw_status);
}

@@ -867,7 +877,8 @@ static int wl1271_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
* before that, the tx_work will not be initialized!
*/

- ieee80211_queue_work(wl->hw, &wl->tx_work);
+ if (!test_bit(WL1271_FLAG_FW_TX_BUSY, &wl->flags))
+ ieee80211_queue_work(wl->hw, &wl->tx_work);

/*
* The workqueue is slow to process the tx_queue and we need stop
diff --git a/drivers/net/wireless/wl12xx/wl1271_tx.c b/drivers/net/wireless/wl12xx/wl1271_tx.c
index b13b373..cf32a77 100644
--- a/drivers/net/wireless/wl12xx/wl1271_tx.c
+++ b/drivers/net/wireless/wl12xx/wl1271_tx.c
@@ -204,9 +204,8 @@ u32 wl1271_tx_enabled_rates_get(struct wl1271 *wl, u32 rate_set)
return enabled_rates;
}

-void wl1271_tx_work(struct work_struct *work)
+void wl1271_tx_work_locked(struct wl1271 *wl)
{
- struct wl1271 *wl = container_of(work, struct wl1271, tx_work);
struct sk_buff *skb;
bool woken_up = false;
u32 sta_rates = 0;
@@ -223,8 +222,6 @@ void wl1271_tx_work(struct work_struct *work)
spin_unlock_irqrestore(&wl->wl_lock, flags);
}

- mutex_lock(&wl->mutex);
-
if (unlikely(wl->state == WL1271_STATE_OFF))
goto out;

@@ -260,6 +257,8 @@ void wl1271_tx_work(struct work_struct *work)
* Queue back last skb, and stop aggregating.
*/
skb_queue_head(&wl->tx_queue, skb);
+ /* No work left, avoid scheduling redundant tx work */
+ set_bit(WL1271_FLAG_FW_TX_BUSY, &wl->flags);
goto out_ack;
} else if (ret < 0) {
dev_kfree_skb(skb);
@@ -283,7 +282,14 @@ out_ack:
out:
if (woken_up)
wl1271_ps_elp_sleep(wl);
+}

+void wl1271_tx_work(struct work_struct *work)
+{
+ struct wl1271 *wl = container_of(work, struct wl1271, tx_work);
+
+ mutex_lock(&wl->mutex);
+ wl1271_tx_work_locked(wl);
mutex_unlock(&wl->mutex);
}

diff --git a/drivers/net/wireless/wl12xx/wl1271_tx.h b/drivers/net/wireless/wl12xx/wl1271_tx.h
index d12a129..f1c9065 100644
--- a/drivers/net/wireless/wl12xx/wl1271_tx.h
+++ b/drivers/net/wireless/wl12xx/wl1271_tx.h
@@ -140,6 +140,7 @@ static inline int wl1271_tx_get_queue(int queue)
}

void wl1271_tx_work(struct work_struct *work);
+void wl1271_tx_work_locked(struct wl1271 *wl);
void wl1271_tx_complete(struct wl1271 *wl);
void wl1271_tx_reset(struct wl1271 *wl);
void wl1271_tx_flush(struct wl1271 *wl);
--
1.7.0.4


2010-10-12 12:56:30

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH v3 3/4] wl1271: Allocate TX descriptors more efficiently

On Tue, 2010-10-12 at 14:49 +0200, Ido Yariv wrote:

> + id = find_first_zero_bit(wl->tx_frames_map, ACX_TX_DESCRIPTORS);
> + if (id >= ACX_TX_DESCRIPTORS)
> + return -EBUSY;
> +
> + set_bit(id, wl->tx_frames_map);
> + wl->tx_frames[id] = skb;
> + wl->tx_frames_cnt++;

This is all under some lock, right? (If that isn't the case, it's racy)
Therefore, you need not use atomic bitops which will be fairly
expensive...

Have you actually measured this? It seems that it con't have a huge
effect, and the atomic bitops you use now might even negate it.

> + if (test_and_clear_bit(id, wl->tx_frames_map)) {

There too.

johannes


2010-10-12 12:49:50

by Ido Yariv

[permalink] [raw]
Subject: [PATCH v3 4/4] wl1271: Fix TX queue low watermark handling

The number of entries in the TX queue is compared to the low watermark
value each time TX completion interrupts are handled.
However, the fact that a TX completion arrived does not necessarily mean
there are any less skbs in the TX queue.

In addition, a TX completion interrupt does not necessarily mean that there
are any new available TX blocks. Thus, queuing TX work when the low
watermark is reached might not be needed.

Fix this by moving the low watermark handling to the TX work function,
and avoid queuing TX work in this case.

Signed-off-by: Ido Yariv <[email protected]>
---
drivers/net/wireless/wl12xx/wl1271_tx.c | 35 +++++++++++++++++++-----------
1 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/wl12xx/wl1271_tx.c b/drivers/net/wireless/wl12xx/wl1271_tx.c
index a3a29ea..4b8218f 100644
--- a/drivers/net/wireless/wl12xx/wl1271_tx.c
+++ b/drivers/net/wireless/wl12xx/wl1271_tx.c
@@ -212,6 +212,20 @@ u32 wl1271_tx_enabled_rates_get(struct wl1271 *wl, u32 rate_set)
return enabled_rates;
}

+static void handle_tx_low_watermark(struct wl1271 *wl)
+{
+ unsigned long flags;
+
+ if (test_bit(WL1271_FLAG_TX_QUEUE_STOPPED, &wl->flags) &&
+ skb_queue_len(&wl->tx_queue) <= WL1271_TX_QUEUE_LOW_WATERMARK) {
+ /* firmware buffer has space, restart queues */
+ spin_lock_irqsave(&wl->wl_lock, flags);
+ ieee80211_wake_queues(wl->hw);
+ clear_bit(WL1271_FLAG_TX_QUEUE_STOPPED, &wl->flags);
+ spin_unlock_irqrestore(&wl->wl_lock, flags);
+ }
+}
+
void wl1271_tx_work_locked(struct wl1271 *wl)
{
struct sk_buff *skb;
@@ -225,6 +239,7 @@ void wl1271_tx_work_locked(struct wl1271 *wl)
if (unlikely(test_and_clear_bit(WL1271_FLAG_STA_RATES_CHANGED,
&wl->flags))) {
unsigned long flags;
+
spin_lock_irqsave(&wl->wl_lock, flags);
sta_rates = wl->sta_rate_set;
spin_unlock_irqrestore(&wl->wl_lock, flags);
@@ -285,6 +300,7 @@ out_ack:
if (sent_packets) {
/* interrupt the firmware with the new packets */
wl1271_write32(wl, WL1271_HOST_WR_ACCESS, wl->tx_packets_count);
+ handle_tx_low_watermark(wl);
}

out:
@@ -399,19 +415,6 @@ void wl1271_tx_complete(struct wl1271 *wl)

wl->tx_results_count++;
}
-
- if (test_bit(WL1271_FLAG_TX_QUEUE_STOPPED, &wl->flags) &&
- skb_queue_len(&wl->tx_queue) <= WL1271_TX_QUEUE_LOW_WATERMARK) {
- unsigned long flags;
-
- /* firmware buffer has space, restart queues */
- wl1271_debug(DEBUG_TX, "tx_complete: waking queues");
- spin_lock_irqsave(&wl->wl_lock, flags);
- ieee80211_wake_queues(wl->hw);
- clear_bit(WL1271_FLAG_TX_QUEUE_STOPPED, &wl->flags);
- spin_unlock_irqrestore(&wl->wl_lock, flags);
- ieee80211_queue_work(wl->hw, &wl->tx_work);
- }
}

/* caller must hold wl->mutex */
@@ -426,6 +429,12 @@ void wl1271_tx_reset(struct wl1271 *wl)
ieee80211_tx_status(wl->hw, skb);
}

+ /*
+ * Make sure the driver is at a consistent state, in case this
+ * function is called from a context other than interface removal.
+ */
+ handle_tx_low_watermark(wl);
+
for (i = 0; i < ACX_TX_DESCRIPTORS; i++)
if (wl->tx_frames[i] != NULL) {
skb = wl->tx_frames[i];
--
1.7.0.4


2010-10-12 14:20:13

by Ido Yariv

[permalink] [raw]
Subject: [PATCH v4 3/4] wl1271: Allocate TX descriptors more efficiently

On each TX descriptor allocation, a free entry is found by traversing the TX
descriptors array.

Improve this by holding a bitmap of all TX descriptors, and using efficient
bit operations to search for free entries.

Signed-off-by: Ido Yariv <[email protected]>
---
Changes from v3:
- Use non-atomic bitops when allocating/deallocating TX descriptors

drivers/net/wireless/wl12xx/wl1271.h | 1 +
drivers/net/wireless/wl12xx/wl1271_main.c | 1 +
drivers/net/wireless/wl12xx/wl1271_tx.c | 38 ++++++++++++++++------------
3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/wl12xx/wl1271.h b/drivers/net/wireless/wl12xx/wl1271.h
index 4a034a3..718e96d 100644
--- a/drivers/net/wireless/wl12xx/wl1271.h
+++ b/drivers/net/wireless/wl12xx/wl1271.h
@@ -398,6 +398,7 @@ struct wl1271 {
struct work_struct tx_work;

/* Pending TX frames */
+ unsigned long tx_frames_map[BITS_TO_LONGS(ACX_TX_DESCRIPTORS)];
struct sk_buff *tx_frames[ACX_TX_DESCRIPTORS];
int tx_frames_cnt;

diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c
index 18aff22..0fd4725 100644
--- a/drivers/net/wireless/wl12xx/wl1271_main.c
+++ b/drivers/net/wireless/wl12xx/wl1271_main.c
@@ -2532,6 +2532,7 @@ struct ieee80211_hw *wl1271_alloc_hw(void)
wl->sg_enabled = true;
wl->hw_pg_ver = -1;

+ memset(wl->tx_frames_map, 0, sizeof(wl->tx_frames_map));
for (i = 0; i < ACX_TX_DESCRIPTORS; i++)
wl->tx_frames[i] = NULL;

diff --git a/drivers/net/wireless/wl12xx/wl1271_tx.c b/drivers/net/wireless/wl12xx/wl1271_tx.c
index cf32a77..85878e5 100644
--- a/drivers/net/wireless/wl12xx/wl1271_tx.c
+++ b/drivers/net/wireless/wl12xx/wl1271_tx.c
@@ -30,17 +30,26 @@
#include "wl1271_ps.h"
#include "wl1271_tx.h"

-static int wl1271_tx_id(struct wl1271 *wl, struct sk_buff *skb)
+static int wl1271_alloc_tx_id(struct wl1271 *wl, struct sk_buff *skb)
{
- int i;
- for (i = 0; i < ACX_TX_DESCRIPTORS; i++)
- if (wl->tx_frames[i] == NULL) {
- wl->tx_frames[i] = skb;
- wl->tx_frames_cnt++;
- return i;
- }
+ int id;
+
+ id = find_first_zero_bit(wl->tx_frames_map, ACX_TX_DESCRIPTORS);
+ if (id >= ACX_TX_DESCRIPTORS)
+ return -EBUSY;
+
+ __set_bit(id, wl->tx_frames_map);
+ wl->tx_frames[id] = skb;
+ wl->tx_frames_cnt++;
+ return id;
+}

- return -EBUSY;
+static void wl1271_free_tx_id(struct wl1271 *wl, int id)
+{
+ if (__test_and_clear_bit(id, wl->tx_frames_map)) {
+ wl->tx_frames[id] = NULL;
+ wl->tx_frames_cnt--;
+ }
}

static int wl1271_tx_allocate(struct wl1271 *wl, struct sk_buff *skb, u32 extra,
@@ -55,7 +64,7 @@ static int wl1271_tx_allocate(struct wl1271 *wl, struct sk_buff *skb, u32 extra,
return -EAGAIN;

/* allocate free identifier for the packet */
- id = wl1271_tx_id(wl, skb);
+ id = wl1271_alloc_tx_id(wl, skb);
if (id < 0)
return id;

@@ -79,8 +88,7 @@ static int wl1271_tx_allocate(struct wl1271 *wl, struct sk_buff *skb, u32 extra,
"tx_allocate: size: %d, blocks: %d, id: %d",
total_len, total_blocks, id);
} else {
- wl->tx_frames[id] = NULL;
- wl->tx_frames_cnt--;
+ wl1271_free_tx_id(wl, id);
}

return ret;
@@ -352,8 +360,7 @@ static void wl1271_tx_complete_packet(struct wl1271 *wl,

/* return the packet to the stack */
ieee80211_tx_status(wl->hw, skb);
- wl->tx_frames[result->id] = NULL;
- wl->tx_frames_cnt--;
+ wl1271_free_tx_id(wl, result->id);
}

/* Called upon reception of a TX complete interrupt */
@@ -422,11 +429,10 @@ void wl1271_tx_reset(struct wl1271 *wl)
for (i = 0; i < ACX_TX_DESCRIPTORS; i++)
if (wl->tx_frames[i] != NULL) {
skb = wl->tx_frames[i];
- wl->tx_frames[i] = NULL;
+ wl1271_free_tx_id(wl, i);
wl1271_debug(DEBUG_TX, "freeing skb 0x%p", skb);
ieee80211_tx_status(wl->hw, skb);
}
- wl->tx_frames_cnt = 0;
}

#define WL1271_TX_FLUSH_TIMEOUT 500000
--
1.7.0.4


2010-10-12 12:49:23

by Ido Yariv

[permalink] [raw]
Subject: [PATCH v3 1/4] wl1271: TX aggregation optimization

In case the aggregation buffer is too small to hold all available packets,
the buffer is transferred to the FW and no more packets are aggregated.
Although there may be enough available TX blocks, no additional packets will
be handled by the current TX work.

Fix this by flushing the aggregation buffer when it's full, and continue
transferring packets as long as there are enough available TX blocks.

Signed-off-by: Ido Yariv <[email protected]>
---
drivers/net/wireless/wl12xx/wl1271_tx.c | 27 +++++++++++++++++++--------
1 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/wl12xx/wl1271_tx.c b/drivers/net/wireless/wl12xx/wl1271_tx.c
index e3dc13c..b13b373 100644
--- a/drivers/net/wireless/wl12xx/wl1271_tx.c
+++ b/drivers/net/wireless/wl12xx/wl1271_tx.c
@@ -52,7 +52,7 @@ static int wl1271_tx_allocate(struct wl1271 *wl, struct sk_buff *skb, u32 extra,
int id, ret = -EBUSY;

if (buf_offset + total_len > WL1271_AGGR_BUFFER_SIZE)
- return -EBUSY;
+ return -EAGAIN;

/* allocate free identifier for the packet */
id = wl1271_tx_id(wl, skb);
@@ -210,7 +210,8 @@ void wl1271_tx_work(struct work_struct *work)
struct sk_buff *skb;
bool woken_up = false;
u32 sta_rates = 0;
- u32 buf_offset;
+ u32 buf_offset = 0;
+ bool sent_packets = false;
int ret;

/* check if the rates supported by the AP have changed */
@@ -233,9 +234,6 @@ void wl1271_tx_work(struct work_struct *work)
wl1271_acx_rate_policies(wl);
}

- /* Prepare the transfer buffer, by aggregating all
- * available packets */
- buf_offset = 0;
while ((skb = skb_dequeue(&wl->tx_queue))) {
if (!woken_up) {
ret = wl1271_ps_elp_wakeup(wl, false);
@@ -245,10 +243,20 @@ void wl1271_tx_work(struct work_struct *work)
}

ret = wl1271_prepare_tx_frame(wl, skb, buf_offset);
- if (ret == -EBUSY) {
+ if (ret == -EAGAIN) {
/*
- * Either the firmware buffer is full, or the
- * aggregation buffer is.
+ * Aggregation buffer is full.
+ * Flush buffer and try again.
+ */
+ skb_queue_head(&wl->tx_queue, skb);
+ wl1271_write(wl, WL1271_SLV_MEM_DATA, wl->aggr_buf,
+ buf_offset, true);
+ sent_packets = true;
+ buf_offset = 0;
+ continue;
+ } else if (ret == -EBUSY) {
+ /*
+ * Firmware buffer is full.
* Queue back last skb, and stop aggregating.
*/
skb_queue_head(&wl->tx_queue, skb);
@@ -265,6 +273,9 @@ out_ack:
if (buf_offset) {
wl1271_write(wl, WL1271_SLV_MEM_DATA, wl->aggr_buf,
buf_offset, true);
+ sent_packets = true;
+ }
+ if (sent_packets) {
/* interrupt the firmware with the new packets */
wl1271_write32(wl, WL1271_HOST_WR_ACCESS, wl->tx_packets_count);
}
--
1.7.0.4