Hi,
This patchset reduces CPU load and improves
improves TX performance on AP135 by additional
30mbps (give or take; 560mbps -> 590mbps), at
least on my testbed.
Michal Kazior (5):
ath10k: use num_pending_tx instead of msdu id bitmap
ath10k: avoid needless memset on TX path
ath10k: decouple HTT TX completions
ath10k: cleanup HTT TX functions
ath10k: use msdu headroom to store txfrag
drivers/net/wireless/ath/ath10k/core.h | 12 +-
drivers/net/wireless/ath/ath10k/htc.c | 2 +-
drivers/net/wireless/ath/ath10k/htt_rx.c | 4 +-
drivers/net/wireless/ath/ath10k/htt_tx.c | 249 ++++++++++++------------------
drivers/net/wireless/ath/ath10k/mac.c | 9 +-
drivers/net/wireless/ath/ath10k/txrx.c | 65 +++-----
drivers/net/wireless/ath/ath10k/txrx.h | 5 +-
7 files changed, 131 insertions(+), 215 deletions(-)
--
1.7.9.5
This reduces number of memory accesses and
hopefully contributes to better performance in the
future.
Signed-off-by: Michal Kazior <[email protected]>
---
drivers/net/wireless/ath/ath10k/htc.c | 2 +-
drivers/net/wireless/ath/ath10k/htt_tx.c | 4 +++-
drivers/net/wireless/ath/ath10k/mac.c | 4 +++-
drivers/net/wireless/ath/ath10k/txrx.c | 1 -
4 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c
index d0d7212..3118d75 100644
--- a/drivers/net/wireless/ath/ath10k/htc.c
+++ b/drivers/net/wireless/ath/ath10k/htc.c
@@ -103,10 +103,10 @@ static void ath10k_htc_prepare_tx_skb(struct ath10k_htc_ep *ep,
struct ath10k_htc_hdr *hdr;
hdr = (struct ath10k_htc_hdr *)skb->data;
- memset(hdr, 0, sizeof(*hdr));
hdr->eid = ep->eid;
hdr->len = __cpu_to_le16(skb->len - sizeof(*hdr));
+ hdr->flags = 0;
spin_lock_bh(&ep->htc->tx_lock);
hdr->seq_no = ep->seq_no++;
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index 4548128..c4bbf74 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -384,9 +384,11 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
/* refcount is decremented by HTC and HTT completions until it reaches
* zero and is freed */
skb_cb = ATH10K_SKB_CB(txdesc);
+ skb_cb->htt.is_conf = false;
skb_cb->htt.msdu_id = msdu_id;
skb_cb->htt.refcount = 2;
skb_cb->htt.msdu = msdu;
+ skb_cb->htt.txfrag = NULL;
res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
if (res)
@@ -505,7 +507,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
skb_put(txdesc, desc_len);
cmd = (struct htt_cmd *)txdesc->data;
- memset(cmd, 0, desc_len);
tid = ATH10K_SKB_CB(msdu)->htt.tid;
@@ -555,6 +556,7 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
/* refcount is decremented by HTC and HTT completions until it reaches
* zero and is freed */
skb_cb = ATH10K_SKB_CB(txdesc);
+ skb_cb->htt.is_conf = false;
skb_cb->htt.msdu_id = msdu_id;
skb_cb->htt.refcount = 2;
skb_cb->htt.txfrag = txfrag;
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 5a56833..0d367e4 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -1757,7 +1757,9 @@ static void ath10k_tx(struct ieee80211_hw *hw,
ath10k_tx_h_seq_no(skb);
}
- memset(ATH10K_SKB_CB(skb), 0, sizeof(*ATH10K_SKB_CB(skb)));
+ ATH10K_SKB_CB(skb)->is_mapped = false;
+ ATH10K_SKB_CB(skb)->is_aborted = false;
+ ATH10K_SKB_CB(skb)->htt.is_offchan = false;
ATH10K_SKB_CB(skb)->htt.vdev_id = vdev_id;
ATH10K_SKB_CB(skb)->htt.tid = tid;
diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
index 37b8196..f6fed31 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c
@@ -75,7 +75,6 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt, struct sk_buff *txdesc)
ath10k_report_offchan_tx(htt->ar, msdu);
info = IEEE80211_SKB_CB(msdu);
- memset(&info->status, 0, sizeof(info->status));
if (ATH10K_SKB_CB(txdesc)->htt.discard) {
ieee80211_free_txskb(htt->ar->hw, msdu);
--
1.7.9.5
Michal Kazior <[email protected]> writes:
> Hi,
>
> This patchset reduces CPU load and improves
> improves TX performance on AP135 by additional
> 30mbps (give or take; 560mbps -> 590mbps), at
> least on my testbed.
>
>
> Michal Kazior (5):
> ath10k: use num_pending_tx instead of msdu id bitmap
> ath10k: avoid needless memset on TX path
> ath10k: decouple HTT TX completions
> ath10k: cleanup HTT TX functions
> ath10k: use msdu headroom to store txfrag
Thanks, all five applied.
--
Kalle Valo
Use a saner goto scheme for failure handling. Also
group operations more sensibly.
Signed-off-by: Michal Kazior <[email protected]>
---
drivers/net/wireless/ath/ath10k/htt_tx.c | 141 ++++++++++++++----------------
1 file changed, 68 insertions(+), 73 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index 06946d2..5e738d8 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -315,30 +315,30 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
res = ath10k_htt_tx_inc_pending(htt);
if (res)
- return res;
+ goto err;
len += sizeof(cmd->hdr);
len += sizeof(cmd->mgmt_tx);
- txdesc = ath10k_htc_alloc_skb(len);
- if (!txdesc) {
- res = -ENOMEM;
- goto err;
- }
-
spin_lock_bh(&htt->tx_lock);
- msdu_id = ath10k_htt_tx_alloc_msdu_id(htt);
- if (msdu_id < 0) {
+ res = ath10k_htt_tx_alloc_msdu_id(htt);
+ if (res < 0) {
spin_unlock_bh(&htt->tx_lock);
- res = msdu_id;
- goto err;
+ goto err_tx_dec;
}
+ msdu_id = res;
htt->pending_tx[msdu_id] = msdu;
spin_unlock_bh(&htt->tx_lock);
+ txdesc = ath10k_htc_alloc_skb(len);
+ if (!txdesc) {
+ res = -ENOMEM;
+ goto err_free_msdu_id;
+ }
+
res = ath10k_skb_map(dev, msdu);
if (res)
- goto err;
+ goto err_free_txdesc;
skb_put(txdesc, len);
cmd = (struct htt_cmd *)txdesc->data;
@@ -352,22 +352,22 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
if (res)
- goto err;
+ goto err_unmap_msdu;
return 0;
-err:
+err_unmap_msdu:
ath10k_skb_unmap(dev, msdu);
-
- if (txdesc)
- dev_kfree_skb_any(txdesc);
- if (msdu_id >= 0) {
- spin_lock_bh(&htt->tx_lock);
- htt->pending_tx[msdu_id] = NULL;
- ath10k_htt_tx_free_msdu_id(htt, msdu_id);
- spin_unlock_bh(&htt->tx_lock);
- }
+err_free_txdesc:
+ dev_kfree_skb_any(txdesc);
+err_free_msdu_id:
+ spin_lock_bh(&htt->tx_lock);
+ htt->pending_tx[msdu_id] = NULL;
+ ath10k_htt_tx_free_msdu_id(htt, msdu_id);
+ spin_unlock_bh(&htt->tx_lock);
+err_tx_dec:
ath10k_htt_tx_dec_pending(htt);
+err:
return res;
}
@@ -379,6 +379,7 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data;
struct sk_buff *txdesc = NULL;
struct sk_buff *txfrag = NULL;
+ bool use_frags;
u8 vdev_id = ATH10K_SKB_CB(msdu)->htt.vdev_id;
u8 tid;
int prefetch_len, desc_len, frag_len;
@@ -390,7 +391,17 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
res = ath10k_htt_tx_inc_pending(htt);
if (res)
- return res;
+ goto err;
+
+ spin_lock_bh(&htt->tx_lock);
+ res = ath10k_htt_tx_alloc_msdu_id(htt);
+ if (res < 0) {
+ spin_unlock_bh(&htt->tx_lock);
+ goto err_tx_dec;
+ }
+ msdu_id = res;
+ htt->pending_tx[msdu_id] = msdu;
+ spin_unlock_bh(&htt->tx_lock);
prefetch_len = min(htt->prefetch_len, msdu->len);
prefetch_len = roundup(prefetch_len, 4);
@@ -401,46 +412,34 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
txdesc = ath10k_htc_alloc_skb(desc_len);
if (!txdesc) {
res = -ENOMEM;
- goto err;
+ goto err_free_msdu_id;
}
/* Since HTT 3.0 there is no separate mgmt tx command. However in case
* of mgmt tx using TX_FRM there is not tx fragment list. Instead of tx
* fragment list host driver specifies directly frame pointer. */
- if (htt->target_version_major < 3 ||
- !ieee80211_is_mgmt(hdr->frame_control)) {
+ use_frags = htt->target_version_major < 3 ||
+ !ieee80211_is_mgmt(hdr->frame_control);
+
+ if (use_frags) {
txfrag = dev_alloc_skb(frag_len);
if (!txfrag) {
res = -ENOMEM;
- goto err;
+ goto err_free_txdesc;
}
}
if (!IS_ALIGNED((unsigned long)txdesc->data, 4)) {
ath10k_warn("htt alignment check failed. dropping packet.\n");
res = -EIO;
- goto err;
+ goto err_free_txfrag;
}
- spin_lock_bh(&htt->tx_lock);
- msdu_id = ath10k_htt_tx_alloc_msdu_id(htt);
- if (msdu_id < 0) {
- spin_unlock_bh(&htt->tx_lock);
- res = msdu_id;
- goto err;
- }
- htt->pending_tx[msdu_id] = msdu;
- spin_unlock_bh(&htt->tx_lock);
-
res = ath10k_skb_map(dev, msdu);
if (res)
- goto err;
+ goto err_free_txfrag;
- /* Since HTT 3.0 there is no separate mgmt tx command. However in case
- * of mgmt tx using TX_FRM there is not tx fragment list. Instead of tx
- * fragment list host driver specifies directly frame pointer. */
- if (htt->target_version_major < 3 ||
- !ieee80211_is_mgmt(hdr->frame_control)) {
+ if (use_frags) {
/* tx fragment list must be terminated with zero-entry */
skb_put(txfrag, frag_len);
tx_frags = (struct htt_data_tx_desc_frag *)txfrag->data;
@@ -451,7 +450,7 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
res = ath10k_skb_map(dev, txfrag);
if (res)
- goto err;
+ goto err_unmap_msdu;
ath10k_dbg(ATH10K_DBG_HTT, "txfrag 0x%llx\n",
(unsigned long long) ATH10K_SKB_CB(txfrag)->paddr);
@@ -476,15 +475,11 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
flags0 |= HTT_DATA_TX_DESC_FLAGS0_NO_ENCRYPT;
flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
- /* Since HTT 3.0 there is no separate mgmt tx command. However in case
- * of mgmt tx using TX_FRM there is not tx fragment list. Instead of tx
- * fragment list host driver specifies directly frame pointer. */
- if (htt->target_version_major >= 3 &&
- ieee80211_is_mgmt(hdr->frame_control))
- flags0 |= SM(ATH10K_HW_TXRX_MGMT,
+ if (use_frags)
+ flags0 |= SM(ATH10K_HW_TXRX_NATIVE_WIFI,
HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
else
- flags0 |= SM(ATH10K_HW_TXRX_NATIVE_WIFI,
+ flags0 |= SM(ATH10K_HW_TXRX_MGMT,
HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
flags1 = 0;
@@ -493,14 +488,10 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD;
flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD;
- /* Since HTT 3.0 there is no separate mgmt tx command. However in case
- * of mgmt tx using TX_FRM there is not tx fragment list. Instead of tx
- * fragment list host driver specifies directly frame pointer. */
- if (htt->target_version_major >= 3 &&
- ieee80211_is_mgmt(hdr->frame_control))
- frags_paddr = ATH10K_SKB_CB(msdu)->paddr;
- else
+ if (use_frags)
frags_paddr = ATH10K_SKB_CB(txfrag)->paddr;
+ else
+ frags_paddr = ATH10K_SKB_CB(msdu)->paddr;
cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM;
cmd->data_tx.flags0 = flags0;
@@ -514,23 +505,27 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
if (res)
- goto err;
+ goto err_restore;
return 0;
-err:
- if (txfrag)
+
+err_restore:
+ if (use_frags)
ath10k_skb_unmap(dev, txfrag);
- if (txdesc)
- dev_kfree_skb_any(txdesc);
- if (txfrag)
+err_unmap_msdu:
+ ath10k_skb_unmap(dev, msdu);
+err_free_txfrag:
+ if (use_frags)
dev_kfree_skb_any(txfrag);
- if (msdu_id >= 0) {
- spin_lock_bh(&htt->tx_lock);
- htt->pending_tx[msdu_id] = NULL;
- ath10k_htt_tx_free_msdu_id(htt, msdu_id);
- spin_unlock_bh(&htt->tx_lock);
- }
+err_free_txdesc:
+ dev_kfree_skb_any(txdesc);
+err_free_msdu_id:
+ spin_lock_bh(&htt->tx_lock);
+ htt->pending_tx[msdu_id] = NULL;
+ ath10k_htt_tx_free_msdu_id(htt, msdu_id);
+ spin_unlock_bh(&htt->tx_lock);
+err_tx_dec:
ath10k_htt_tx_dec_pending(htt);
- ath10k_skb_unmap(dev, msdu);
+err:
return res;
}
--
1.7.9.5
Until now the all MSDU transfer related structures
were freed when all resources were unreferenced.
Now HTC transfer is freed independently and HTT
transfer is so too.
This yields a way more simpler ath10k_skb_cb and
should possibly enable parallel pipe processing
(which is now serialized in
ath10k_pci_process_ce routine).
Signed-off-by: Michal Kazior <[email protected]>
---
drivers/net/wireless/ath/ath10k/core.h | 8 ----
drivers/net/wireless/ath/ath10k/htt_rx.c | 4 +-
drivers/net/wireless/ath/ath10k/htt_tx.c | 64 ++++--------------------------
drivers/net/wireless/ath/ath10k/mac.c | 2 -
drivers/net/wireless/ath/ath10k/txrx.c | 52 ++++++++----------------
drivers/net/wireless/ath/ath10k/txrx.h | 5 +--
6 files changed, 27 insertions(+), 108 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index fcf94ee..4563f80 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -52,18 +52,10 @@ struct ath10k_skb_cb {
struct {
u8 vdev_id;
- u16 msdu_id;
u8 tid;
bool is_offchan;
- bool is_conf;
- bool discard;
- bool no_ack;
- u8 refcount;
struct sk_buff *txfrag;
- struct sk_buff *msdu;
} __packed htt;
-
- /* 4 bytes left on 64bit arch */
} __packed;
static inline struct ath10k_skb_cb *ATH10K_SKB_CB(struct sk_buff *skb)
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index a39fbf4..62ea9c8 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1140,7 +1140,7 @@ void ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
break;
}
- ath10k_txrx_tx_completed(htt, &tx_done);
+ ath10k_txrx_tx_unref(htt, &tx_done);
break;
}
case HTT_T2H_MSG_TYPE_TX_COMPL_IND: {
@@ -1174,7 +1174,7 @@ void ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
for (i = 0; i < resp->data_tx_completion.num_msdus; i++) {
msdu_id = resp->data_tx_completion.msdus[i];
tx_done.msdu_id = __le16_to_cpu(msdu_id);
- ath10k_txrx_tx_completed(htt, &tx_done);
+ ath10k_txrx_tx_unref(htt, &tx_done);
}
break;
}
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index c4bbf74..06946d2 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -117,7 +117,7 @@ int ath10k_htt_tx_attach(struct ath10k_htt *htt)
static void ath10k_htt_tx_cleanup_pending(struct ath10k_htt *htt)
{
- struct sk_buff *txdesc;
+ struct htt_tx_done tx_done = {0};
int msdu_id;
/* No locks needed. Called after communication with the device has
@@ -127,18 +127,13 @@ static void ath10k_htt_tx_cleanup_pending(struct ath10k_htt *htt)
if (!test_bit(msdu_id, htt->used_msdu_ids))
continue;
- txdesc = htt->pending_tx[msdu_id];
- if (!txdesc)
- continue;
-
ath10k_dbg(ATH10K_DBG_HTT, "force cleanup msdu_id %hu\n",
msdu_id);
- if (ATH10K_SKB_CB(txdesc)->htt.refcount > 0)
- ATH10K_SKB_CB(txdesc)->htt.refcount = 1;
+ tx_done.discard = 1;
+ tx_done.msdu_id = msdu_id;
- ATH10K_SKB_CB(txdesc)->htt.discard = true;
- ath10k_txrx_tx_unref(htt, txdesc);
+ ath10k_txrx_tx_unref(htt, &tx_done);
}
}
@@ -152,26 +147,7 @@ void ath10k_htt_tx_detach(struct ath10k_htt *htt)
void ath10k_htt_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb)
{
- struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(skb);
- struct ath10k_htt *htt = &ar->htt;
-
- if (skb_cb->htt.is_conf) {
- dev_kfree_skb_any(skb);
- return;
- }
-
- if (skb_cb->is_aborted) {
- skb_cb->htt.discard = true;
-
- /* if the skbuff is aborted we need to make sure we'll free up
- * the tx resources, we can't simply run tx_unref() 2 times
- * because if htt tx completion came in earlier we'd access
- * unallocated memory */
- if (skb_cb->htt.refcount > 1)
- skb_cb->htt.refcount = 1;
- }
-
- ath10k_txrx_tx_unref(htt, skb);
+ dev_kfree_skb_any(skb);
}
int ath10k_htt_h2t_ver_req_msg(struct ath10k_htt *htt)
@@ -192,8 +168,6 @@ int ath10k_htt_h2t_ver_req_msg(struct ath10k_htt *htt)
cmd = (struct htt_cmd *)skb->data;
cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_VERSION_REQ;
- ATH10K_SKB_CB(skb)->htt.is_conf = true;
-
ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
if (ret) {
dev_kfree_skb_any(skb);
@@ -233,8 +207,6 @@ int ath10k_htt_h2t_stats_req(struct ath10k_htt *htt, u8 mask, u64 cookie)
req->cookie_lsb = cpu_to_le32(cookie & 0xffffffff);
req->cookie_msb = cpu_to_le32((cookie & 0xffffffff00000000ULL) >> 32);
- ATH10K_SKB_CB(skb)->htt.is_conf = true;
-
ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
if (ret) {
ath10k_warn("failed to send htt type stats request: %d", ret);
@@ -321,8 +293,6 @@ int ath10k_htt_send_rx_ring_cfg_ll(struct ath10k_htt *htt)
#undef desc_offset
- ATH10K_SKB_CB(skb)->htt.is_conf = true;
-
ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
if (ret) {
dev_kfree_skb_any(skb);
@@ -335,7 +305,6 @@ int ath10k_htt_send_rx_ring_cfg_ll(struct ath10k_htt *htt)
int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
{
struct device *dev = htt->ar->dev;
- struct ath10k_skb_cb *skb_cb;
struct sk_buff *txdesc = NULL;
struct htt_cmd *cmd;
u8 vdev_id = ATH10K_SKB_CB(msdu)->htt.vdev_id;
@@ -364,7 +333,7 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
res = msdu_id;
goto err;
}
- htt->pending_tx[msdu_id] = txdesc;
+ htt->pending_tx[msdu_id] = msdu;
spin_unlock_bh(&htt->tx_lock);
res = ath10k_skb_map(dev, msdu);
@@ -381,15 +350,6 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
memcpy(cmd->mgmt_tx.hdr, msdu->data,
min_t(int, msdu->len, HTT_MGMT_FRM_HDR_DOWNLOAD_LEN));
- /* refcount is decremented by HTC and HTT completions until it reaches
- * zero and is freed */
- skb_cb = ATH10K_SKB_CB(txdesc);
- skb_cb->htt.is_conf = false;
- skb_cb->htt.msdu_id = msdu_id;
- skb_cb->htt.refcount = 2;
- skb_cb->htt.msdu = msdu;
- skb_cb->htt.txfrag = NULL;
-
res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
if (res)
goto err;
@@ -417,7 +377,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
struct htt_cmd *cmd;
struct htt_data_tx_desc_frag *tx_frags;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data;
- struct ath10k_skb_cb *skb_cb;
struct sk_buff *txdesc = NULL;
struct sk_buff *txfrag = NULL;
u8 vdev_id = ATH10K_SKB_CB(msdu)->htt.vdev_id;
@@ -470,7 +429,7 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
res = msdu_id;
goto err;
}
- htt->pending_tx[msdu_id] = txdesc;
+ htt->pending_tx[msdu_id] = msdu;
spin_unlock_bh(&htt->tx_lock);
res = ath10k_skb_map(dev, msdu);
@@ -553,15 +512,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
memcpy(cmd->data_tx.prefetch, msdu->data, prefetch_len);
- /* refcount is decremented by HTC and HTT completions until it reaches
- * zero and is freed */
- skb_cb = ATH10K_SKB_CB(txdesc);
- skb_cb->htt.is_conf = false;
- skb_cb->htt.msdu_id = msdu_id;
- skb_cb->htt.refcount = 2;
- skb_cb->htt.txfrag = txfrag;
- skb_cb->htt.msdu = msdu;
-
res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
if (res)
goto err;
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 0d367e4..9112e6d6f 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -1757,8 +1757,6 @@ static void ath10k_tx(struct ieee80211_hw *hw,
ath10k_tx_h_seq_no(skb);
}
- ATH10K_SKB_CB(skb)->is_mapped = false;
- ATH10K_SKB_CB(skb)->is_aborted = false;
ATH10K_SKB_CB(skb)->htt.is_offchan = false;
ATH10K_SKB_CB(skb)->htt.vdev_id = vdev_id;
ATH10K_SKB_CB(skb)->htt.tid = tid;
diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
index f6fed31..15395af 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c
@@ -44,21 +44,25 @@ out:
spin_unlock_bh(&ar->data_lock);
}
-void ath10k_txrx_tx_unref(struct ath10k_htt *htt, struct sk_buff *txdesc)
+void ath10k_txrx_tx_unref(struct ath10k_htt *htt,
+ const struct htt_tx_done *tx_done)
{
struct device *dev = htt->ar->dev;
struct ieee80211_tx_info *info;
- struct sk_buff *txfrag = ATH10K_SKB_CB(txdesc)->htt.txfrag;
- struct sk_buff *msdu = ATH10K_SKB_CB(txdesc)->htt.msdu;
+ struct sk_buff *msdu, *txfrag;
int ret;
- if (ATH10K_SKB_CB(txdesc)->htt.refcount == 0)
- return;
-
- ATH10K_SKB_CB(txdesc)->htt.refcount--;
+ ath10k_dbg(ATH10K_DBG_HTT, "htt tx completion msdu_id %u discard %d no_ack %d\n",
+ tx_done->msdu_id, !!tx_done->discard, !!tx_done->no_ack);
- if (ATH10K_SKB_CB(txdesc)->htt.refcount > 0)
+ if (tx_done->msdu_id >= htt->max_num_pending_tx) {
+ ath10k_warn("warning: msdu_id %d too big, ignoring\n",
+ tx_done->msdu_id);
return;
+ }
+
+ msdu = htt->pending_tx[tx_done->msdu_id];
+ txfrag = ATH10K_SKB_CB(msdu)->htt.txfrag;
if (txfrag) {
ret = ath10k_skb_unmap(dev, txfrag);
@@ -76,7 +80,7 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt, struct sk_buff *txdesc)
info = IEEE80211_SKB_CB(msdu);
- if (ATH10K_SKB_CB(txdesc)->htt.discard) {
+ if (tx_done->discard) {
ieee80211_free_txskb(htt->ar->hw, msdu);
goto exit;
}
@@ -84,7 +88,7 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt, struct sk_buff *txdesc)
if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
info->flags |= IEEE80211_TX_STAT_ACK;
- if (ATH10K_SKB_CB(txdesc)->htt.no_ack)
+ if (tx_done->no_ack)
info->flags &= ~IEEE80211_TX_STAT_ACK;
ieee80211_tx_status(htt->ar->hw, msdu);
@@ -92,36 +96,12 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt, struct sk_buff *txdesc)
exit:
spin_lock_bh(&htt->tx_lock);
- htt->pending_tx[ATH10K_SKB_CB(txdesc)->htt.msdu_id] = NULL;
- ath10k_htt_tx_free_msdu_id(htt, ATH10K_SKB_CB(txdesc)->htt.msdu_id);
+ htt->pending_tx[tx_done->msdu_id] = NULL;
+ ath10k_htt_tx_free_msdu_id(htt, tx_done->msdu_id);
__ath10k_htt_tx_dec_pending(htt);
if (htt->num_pending_tx == 0)
wake_up(&htt->empty_tx_wq);
spin_unlock_bh(&htt->tx_lock);
-
- dev_kfree_skb_any(txdesc);
-}
-
-void ath10k_txrx_tx_completed(struct ath10k_htt *htt,
- const struct htt_tx_done *tx_done)
-{
- struct sk_buff *txdesc;
-
- ath10k_dbg(ATH10K_DBG_HTT, "htt tx completion msdu_id %u discard %d no_ack %d\n",
- tx_done->msdu_id, !!tx_done->discard, !!tx_done->no_ack);
-
- if (tx_done->msdu_id >= htt->max_num_pending_tx) {
- ath10k_warn("warning: msdu_id %d too big, ignoring\n",
- tx_done->msdu_id);
- return;
- }
-
- txdesc = htt->pending_tx[tx_done->msdu_id];
-
- ATH10K_SKB_CB(txdesc)->htt.discard = tx_done->discard;
- ATH10K_SKB_CB(txdesc)->htt.no_ack = tx_done->no_ack;
-
- ath10k_txrx_tx_unref(htt, txdesc);
}
static const u8 rx_legacy_rate_idx[] = {
diff --git a/drivers/net/wireless/ath/ath10k/txrx.h b/drivers/net/wireless/ath/ath10k/txrx.h
index e78632a..356dc9c 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.h
+++ b/drivers/net/wireless/ath/ath10k/txrx.h
@@ -19,9 +19,8 @@
#include "htt.h"
-void ath10k_txrx_tx_unref(struct ath10k_htt *htt, struct sk_buff *txdesc);
-void ath10k_txrx_tx_completed(struct ath10k_htt *htt,
- const struct htt_tx_done *tx_done);
+void ath10k_txrx_tx_unref(struct ath10k_htt *htt,
+ const struct htt_tx_done *tx_done);
void ath10k_process_rx(struct ath10k *ar, struct htt_rx_info *info);
struct ath10k_peer *ath10k_peer_find(struct ath10k *ar, int vdev_id,
--
1.7.9.5
It's more efficient to simply check num_pending_tx
value instead of traversing whole bitmap of
msdu ids.
Signed-off-by: Michal Kazior <[email protected]>
---
drivers/net/wireless/ath/ath10k/mac.c | 3 +--
drivers/net/wireless/ath/ath10k/txrx.c | 2 +-
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 11aa13e..5a56833 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2853,8 +2853,7 @@ static void ath10k_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
bool empty;
spin_lock_bh(&ar->htt.tx_lock);
- empty = bitmap_empty(ar->htt.used_msdu_ids,
- ar->htt.max_num_pending_tx);
+ empty = (ar->htt.num_pending_tx == 0);
spin_unlock_bh(&ar->htt.tx_lock);
skip = (ar->state == ATH10K_STATE_WEDGED);
diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
index 68b6fae..37b8196 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c
@@ -96,7 +96,7 @@ exit:
htt->pending_tx[ATH10K_SKB_CB(txdesc)->htt.msdu_id] = NULL;
ath10k_htt_tx_free_msdu_id(htt, ATH10K_SKB_CB(txdesc)->htt.msdu_id);
__ath10k_htt_tx_dec_pending(htt);
- if (bitmap_empty(htt->used_msdu_ids, htt->max_num_pending_tx))
+ if (htt->num_pending_tx == 0)
wake_up(&htt->empty_tx_wq);
spin_unlock_bh(&htt->tx_lock);
--
1.7.9.5
Instead of allocating sk_buff for a mere 16-byte
tx fragment list buffer use headroom of the
original msdu sk_buff.
This decreases CPU cache pressure and improves
performance.
Measured improvement on AP135 is 560mbps ->
590mbps of UDP TX briding traffic.
Signed-off-by: Michal Kazior <[email protected]>
---
drivers/net/wireless/ath/ath10k/core.h | 4 +-
drivers/net/wireless/ath/ath10k/htt_tx.c | 82 +++++++++++++++---------------
drivers/net/wireless/ath/ath10k/mac.c | 4 ++
drivers/net/wireless/ath/ath10k/txrx.c | 16 +++---
4 files changed, 53 insertions(+), 53 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 4563f80..292ad45 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -54,7 +54,9 @@ struct ath10k_skb_cb {
u8 vdev_id;
u8 tid;
bool is_offchan;
- struct sk_buff *txfrag;
+
+ u8 frag_len;
+ u8 pad_len;
} __packed htt;
} __packed;
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index 5e738d8..3b93c6a 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -307,7 +307,8 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
struct device *dev = htt->ar->dev;
struct sk_buff *txdesc = NULL;
struct htt_cmd *cmd;
- u8 vdev_id = ATH10K_SKB_CB(msdu)->htt.vdev_id;
+ struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu);
+ u8 vdev_id = skb_cb->htt.vdev_id;
int len = 0;
int msdu_id = -1;
int res;
@@ -350,6 +351,9 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
memcpy(cmd->mgmt_tx.hdr, msdu->data,
min_t(int, msdu->len, HTT_MGMT_FRM_HDR_DOWNLOAD_LEN));
+ skb_cb->htt.frag_len = 0;
+ skb_cb->htt.pad_len = 0;
+
res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
if (res)
goto err_unmap_msdu;
@@ -377,13 +381,12 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
struct htt_cmd *cmd;
struct htt_data_tx_desc_frag *tx_frags;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data;
+ struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu);
struct sk_buff *txdesc = NULL;
- struct sk_buff *txfrag = NULL;
bool use_frags;
u8 vdev_id = ATH10K_SKB_CB(msdu)->htt.vdev_id;
u8 tid;
- int prefetch_len, desc_len, frag_len;
- dma_addr_t frags_paddr;
+ int prefetch_len, desc_len;
int msdu_id = -1;
int res;
u8 flags0;
@@ -407,7 +410,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
prefetch_len = roundup(prefetch_len, 4);
desc_len = sizeof(cmd->hdr) + sizeof(cmd->data_tx) + prefetch_len;
- frag_len = sizeof(*tx_frags) * 2;
txdesc = ath10k_htc_alloc_skb(desc_len);
if (!txdesc) {
@@ -421,41 +423,44 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
use_frags = htt->target_version_major < 3 ||
!ieee80211_is_mgmt(hdr->frame_control);
- if (use_frags) {
- txfrag = dev_alloc_skb(frag_len);
- if (!txfrag) {
- res = -ENOMEM;
- goto err_free_txdesc;
- }
- }
-
if (!IS_ALIGNED((unsigned long)txdesc->data, 4)) {
ath10k_warn("htt alignment check failed. dropping packet.\n");
res = -EIO;
- goto err_free_txfrag;
+ goto err_free_txdesc;
+ }
+
+ if (use_frags) {
+ skb_cb->htt.frag_len = sizeof(*tx_frags) * 2;
+ skb_cb->htt.pad_len = (unsigned long)msdu->data -
+ round_down((unsigned long)msdu->data, 4);
+
+ skb_push(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
+ } else {
+ skb_cb->htt.frag_len = 0;
+ skb_cb->htt.pad_len = 0;
}
res = ath10k_skb_map(dev, msdu);
if (res)
- goto err_free_txfrag;
+ goto err_pull_txfrag;
if (use_frags) {
+ dma_sync_single_for_cpu(dev, skb_cb->paddr, msdu->len,
+ DMA_TO_DEVICE);
+
/* tx fragment list must be terminated with zero-entry */
- skb_put(txfrag, frag_len);
- tx_frags = (struct htt_data_tx_desc_frag *)txfrag->data;
- tx_frags[0].paddr = __cpu_to_le32(ATH10K_SKB_CB(msdu)->paddr);
- tx_frags[0].len = __cpu_to_le32(msdu->len);
+ tx_frags = (struct htt_data_tx_desc_frag *)msdu->data;
+ tx_frags[0].paddr = __cpu_to_le32(skb_cb->paddr +
+ skb_cb->htt.frag_len +
+ skb_cb->htt.pad_len);
+ tx_frags[0].len = __cpu_to_le32(msdu->len -
+ skb_cb->htt.frag_len -
+ skb_cb->htt.pad_len);
tx_frags[1].paddr = __cpu_to_le32(0);
tx_frags[1].len = __cpu_to_le32(0);
- res = ath10k_skb_map(dev, txfrag);
- if (res)
- goto err_unmap_msdu;
-
- ath10k_dbg(ATH10K_DBG_HTT, "txfrag 0x%llx\n",
- (unsigned long long) ATH10K_SKB_CB(txfrag)->paddr);
- ath10k_dbg_dump(ATH10K_DBG_HTT_DUMP, NULL, "txfrag: ",
- txfrag->data, frag_len);
+ dma_sync_single_for_device(dev, skb_cb->paddr, msdu->len,
+ DMA_TO_DEVICE);
}
ath10k_dbg(ATH10K_DBG_HTT, "msdu 0x%llx\n",
@@ -488,35 +493,28 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD;
flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD;
- if (use_frags)
- frags_paddr = ATH10K_SKB_CB(txfrag)->paddr;
- else
- frags_paddr = ATH10K_SKB_CB(msdu)->paddr;
-
cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM;
cmd->data_tx.flags0 = flags0;
cmd->data_tx.flags1 = __cpu_to_le16(flags1);
- cmd->data_tx.len = __cpu_to_le16(msdu->len);
+ cmd->data_tx.len = __cpu_to_le16(msdu->len -
+ skb_cb->htt.frag_len -
+ skb_cb->htt.pad_len);
cmd->data_tx.id = __cpu_to_le16(msdu_id);
- cmd->data_tx.frags_paddr = __cpu_to_le32(frags_paddr);
+ cmd->data_tx.frags_paddr = __cpu_to_le32(skb_cb->paddr);
cmd->data_tx.peerid = __cpu_to_le32(HTT_INVALID_PEERID);
- memcpy(cmd->data_tx.prefetch, msdu->data, prefetch_len);
+ memcpy(cmd->data_tx.prefetch, hdr, prefetch_len);
res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
if (res)
- goto err_restore;
+ goto err_unmap_msdu;
return 0;
-err_restore:
- if (use_frags)
- ath10k_skb_unmap(dev, txfrag);
err_unmap_msdu:
ath10k_skb_unmap(dev, msdu);
-err_free_txfrag:
- if (use_frags)
- dev_kfree_skb_any(txfrag);
+err_pull_txfrag:
+ skb_pull(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
err_free_txdesc:
dev_kfree_skb_any(txdesc);
err_free_msdu_id:
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 9112e6d6f..99a9bad 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -3342,6 +3342,10 @@ int ath10k_mac_register(struct ath10k *ar)
IEEE80211_HW_WANT_MONITOR_VIF |
IEEE80211_HW_AP_LINK_PS;
+ /* MSDU can have HTT TX fragment pushed in front. The additional 4
+ * bytes is used for padding/alignment if necessary. */
+ ar->hw->extra_tx_headroom += sizeof(struct htt_data_tx_desc_frag)*2 + 4;
+
if (ar->ht_cap_info & WMI_HT_CAP_DYNAMIC_SMPS)
ar->hw->flags |= IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS;
diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
index 15395af..57931d0 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c
@@ -49,7 +49,8 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt,
{
struct device *dev = htt->ar->dev;
struct ieee80211_tx_info *info;
- struct sk_buff *msdu, *txfrag;
+ struct ath10k_skb_cb *skb_cb;
+ struct sk_buff *msdu;
int ret;
ath10k_dbg(ATH10K_DBG_HTT, "htt tx completion msdu_id %u discard %d no_ack %d\n",
@@ -62,20 +63,15 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt,
}
msdu = htt->pending_tx[tx_done->msdu_id];
- txfrag = ATH10K_SKB_CB(msdu)->htt.txfrag;
-
- if (txfrag) {
- ret = ath10k_skb_unmap(dev, txfrag);
- if (ret)
- ath10k_warn("txfrag unmap failed (%d)\n", ret);
-
- dev_kfree_skb_any(txfrag);
- }
+ skb_cb = ATH10K_SKB_CB(msdu);
ret = ath10k_skb_unmap(dev, msdu);
if (ret)
ath10k_warn("data skb unmap failed (%d)\n", ret);
+ if (skb_cb->htt.frag_len)
+ skb_pull(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
+
ath10k_report_offchan_tx(htt->ar, msdu);
info = IEEE80211_SKB_CB(msdu);
--
1.7.9.5