Return-path: Received: from mail-wm0-f48.google.com ([74.125.82.48]:33967 "EHLO mail-wm0-f48.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750803AbcAVHrJ convert rfc822-to-8bit (ORCPT ); Fri, 22 Jan 2016 02:47:09 -0500 Received: by mail-wm0-f48.google.com with SMTP id u188so8100662wmu.1 for ; Thu, 21 Jan 2016 23:47:08 -0800 (PST) MIME-Version: 1.0 In-Reply-To: <56A1182A.4060309@codeaurora.org> References: <1453384006-31907-1-git-send-email-michal.kazior@tieto.com> <1453384006-31907-14-git-send-email-michal.kazior@tieto.com> <56A1182A.4060309@codeaurora.org> Date: Fri, 22 Jan 2016 08:47:07 +0100 Message-ID: (sfid-20160122_084713_631850_4AA703B8) Subject: Re: [PATCH 13/13] ath10k: implement push-pull tx From: Michal Kazior To: Peter Oh Cc: "ath10k@lists.infradead.org" , linux-wireless Content-Type: text/plain; charset=UTF-8 Sender: linux-wireless-owner@vger.kernel.org List-ID: On 21 January 2016 at 18:40, Peter Oh wrote: > > On 01/21/2016 05:46 AM, Michal Kazior wrote: >> >> The current/old tx path design was that host, at >> its own leisure, pushed tx frames to the device. >> For HTT there was ~1000-1400 msdu queue depth. >> >> After reaching that limit the driver would request >> mac80211 to stop queues. There was little control >> over what packets got in there as far as >> DA/RA was considered so it was rather easy to >> starve per-station traffic flows. >> >> With MU-MIMO this became a significant problem >> because the queue depth was insufficient to buffer >> frames from multiple clients (which could have >> different signal quality and capabilities) in an >> efficient fashion. >> >> Hence the new tx path in 10.4 was introduced: a >> pull-push mode. >> >> Firmware and host can share tx queue state via >> DMA. The state is logically a 2 dimensional array >> addressed via peer_id+tid pair. Each entry is a >> counter (either number of bytes or packets. Host >> keeps it updated and firmware uses it for >> scheduling Tx pull requests to host. >> >> This allows MU-MIMO to become a lot more effective >> with 10+ clients. >> >> Signed-off-by: Michal Kazior >> --- >> drivers/net/wireless/ath/ath10k/core.h | 1 + >> drivers/net/wireless/ath/ath10k/htt.h | 6 ++ >> drivers/net/wireless/ath/ath10k/htt_rx.c | 117 >> ++++++++++++++++++++++++++++--- >> drivers/net/wireless/ath/ath10k/htt_tx.c | 39 ++++++++--- >> drivers/net/wireless/ath/ath10k/mac.c | 48 +++++++++++-- >> drivers/net/wireless/ath/ath10k/mac.h | 5 ++ >> 6 files changed, 192 insertions(+), 24 deletions(-) >> >> diff --git a/drivers/net/wireless/ath/ath10k/core.h >> b/drivers/net/wireless/ath/ath10k/core.h >> index f51887c6be74..ab8cbccc0f4b 100644 >> --- a/drivers/net/wireless/ath/ath10k/core.h >> +++ b/drivers/net/wireless/ath/ath10k/core.h >> @@ -307,6 +307,7 @@ struct ath10k_peer { >> struct ath10k_txq { >> unsigned long num_fw_queued; >> + unsigned long num_push_allowed; >> }; >> struct ath10k_sta { >> diff --git a/drivers/net/wireless/ath/ath10k/htt.h >> b/drivers/net/wireless/ath/ath10k/htt.h >> index b1e40f44e76b..02cf55d306e8 100644 >> --- a/drivers/net/wireless/ath/ath10k/htt.h >> +++ b/drivers/net/wireless/ath/ath10k/htt.h >> @@ -1652,6 +1652,7 @@ struct ath10k_htt { >> struct sk_buff_head tx_compl_q; >> struct sk_buff_head rx_compl_q; >> struct sk_buff_head rx_in_ord_compl_q; >> + struct sk_buff_head tx_fetch_ind_q; >> /* rx_status template */ >> struct ieee80211_rx_status rx_status; >> @@ -1670,8 +1671,10 @@ struct ath10k_htt { >> bool enabled; >> struct htt_q_state *vaddr; >> dma_addr_t paddr; >> + u16 num_push_allowed; >> u16 num_peers; >> u16 num_tids; >> + enum htt_tx_mode_switch_mode mode; >> enum htt_q_depth_type type; >> } tx_q_state; >> }; >> @@ -1761,6 +1764,9 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar, >> void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw, >> struct ieee80211_txq *txq); >> +void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw, >> + struct ieee80211_txq *txq); >> +void ath10k_htt_tx_txq_sync(struct ath10k *ar); >> void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt, >> bool is_mgmt); >> int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt, >> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c >> b/drivers/net/wireless/ath/ath10k/htt_rx.c >> index 6e3d95c95568..827c8369b589 100644 >> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c >> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c >> @@ -229,6 +229,7 @@ void ath10k_htt_rx_free(struct ath10k_htt *htt) >> skb_queue_purge(&htt->tx_compl_q); >> skb_queue_purge(&htt->rx_compl_q); >> skb_queue_purge(&htt->rx_in_ord_compl_q); >> + skb_queue_purge(&htt->tx_fetch_ind_q); >> ath10k_htt_rx_ring_free(htt); >> @@ -569,6 +570,7 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt) >> skb_queue_head_init(&htt->tx_compl_q); >> skb_queue_head_init(&htt->rx_compl_q); >> skb_queue_head_init(&htt->rx_in_ord_compl_q); >> + skb_queue_head_init(&htt->tx_fetch_ind_q); >> tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task, >> (unsigned long)htt); >> @@ -2004,16 +2006,21 @@ static void >> ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar, >> static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct >> sk_buff >> *skb) >> { >> + struct ieee80211_hw *hw = ar->hw; >> + struct ieee80211_txq *txq; >> struct htt_resp *resp = (struct htt_resp *)skb->data; >> struct htt_tx_fetch_record *record; >> size_t len; >> size_t max_num_bytes; >> size_t max_num_msdus; >> + size_t num_bytes; >> + size_t num_msdus; >> const __le32 *resp_ids; >> u16 num_records; >> u16 num_resp_ids; >> u16 peer_id; >> u8 tid; >> + int ret; >> int i; >> ath10k_dbg(ar, ATH10K_DBG_HTT, "htt rx tx fetch ind\n"); >> @@ -2039,7 +2046,17 @@ static void ath10k_htt_rx_tx_fetch_ind(struct >> ath10k *ar, struct sk_buff *skb) >> num_records, num_resp_ids, >> le16_to_cpu(resp->tx_fetch_ind.fetch_seq_num)); >> - /* TODO: runtime sanity checks */ >> + if (!ar->htt.tx_q_state.enabled) { >> + ath10k_warn(ar, "received unexpected tx_fetch_ind event: >> not enabled\n"); >> + return; >> + } >> + >> + if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH) { >> + ath10k_warn(ar, "received unexpected tx_fetch_ind event: >> in push mode\n"); >> + return; >> + } >> + >> + rcu_read_lock(); >> for (i = 0; i < num_records; i++) { >> record = &resp->tx_fetch_ind.records[i]; >> @@ -2060,13 +2077,56 @@ static void ath10k_htt_rx_tx_fetch_ind(struct >> ath10k *ar, struct sk_buff *skb) >> continue; >> } >> - /* TODO: dequeue and submit tx to device */ >> + spin_lock_bh(&ar->data_lock); >> + txq = ath10k_mac_txq_lookup(ar, peer_id, tid); >> + spin_unlock_bh(&ar->data_lock); >> + >> + /* It is okay to release the lock and use txq because RCU >> read >> + * lock is held. >> + */ >> + >> + if (unlikely(!txq)) { >> + ath10k_warn(ar, "failed to lookup txq for peer_id >> %hu tid %hhu\n", >> + peer_id, tid); >> + continue; >> + } >> + >> + num_msdus = 0; >> + num_bytes = 0; >> + >> + while (num_msdus < max_num_msdus && >> + num_bytes < max_num_bytes) { >> + ret = ath10k_mac_tx_push_txq(hw, txq); >> + if (ret < 0) >> + break; >> + >> + num_msdus++; >> + num_bytes += ret; >> + } >> + >> + record->num_msdus = cpu_to_le16(num_msdus); >> + record->num_bytes = cpu_to_le32(num_bytes); >> + >> + ath10k_htt_tx_txq_recalc(hw, txq); >> } >> + rcu_read_unlock(); >> + >> resp_ids = >> ath10k_htt_get_tx_fetch_ind_resp_ids(&resp->tx_fetch_ind); >> ath10k_htt_rx_tx_fetch_resp_id_confirm(ar, resp_ids, >> num_resp_ids); >> - /* TODO: generate and send fetch response to device */ >> + ret = ath10k_htt_tx_fetch_resp(ar, >> + resp->tx_fetch_ind.token, >> + resp->tx_fetch_ind.fetch_seq_num, >> + resp->tx_fetch_ind.records, >> + num_records); >> + if (unlikely(ret)) { >> + ath10k_warn(ar, "failed to submit tx fetch resp for token >> 0x%08x: %d\n", >> + le32_to_cpu(resp->tx_fetch_ind.token), ret); >> + /* FIXME: request fw restart */ >> + } >> + >> + ath10k_htt_tx_txq_sync(ar); >> } >> static void ath10k_htt_rx_tx_fetch_confirm(struct ath10k *ar, >> @@ -2102,6 +2162,8 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct >> ath10k *ar, >> { >> const struct htt_resp *resp = (void *)skb->data; >> const struct htt_tx_mode_switch_record *record; >> + struct ieee80211_txq *txq; >> + struct ath10k_txq *artxq; >> size_t len; >> size_t num_records; >> enum htt_tx_mode_switch_mode mode; >> @@ -2153,7 +2215,11 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct >> ath10k *ar, >> if (!enable) >> return; >> - /* TODO: apply configuration */ >> + ar->htt.tx_q_state.enabled = enable; >> + ar->htt.tx_q_state.mode = mode; >> + ar->htt.tx_q_state.num_push_allowed = threshold; >> + >> + rcu_read_lock(); >> for (i = 0; i < num_records; i++) { >> record = &resp->tx_mode_switch_ind.records[i]; >> @@ -2168,10 +2234,33 @@ static void >> ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar, >> continue; >> } >> - /* TODO: apply configuration */ >> + spin_lock_bh(&ar->data_lock); >> + txq = ath10k_mac_txq_lookup(ar, peer_id, tid); >> + spin_unlock_bh(&ar->data_lock); >> + >> + /* It is okay to release the lock and use txq because RCU >> read >> + * lock is held. >> + */ >> + >> + if (unlikely(!txq)) { >> + ath10k_warn(ar, "failed to lookup txq for peer_id >> %hu tid %hhu\n", >> + peer_id, tid); >> + continue; >> + } >> + >> + spin_lock_bh(&ar->htt.tx_lock); >> + artxq = (void *)txq->drv_priv; >> + artxq->num_push_allowed = >> le16_to_cpu(record->num_max_msdus); >> + spin_unlock_bh(&ar->htt.tx_lock); >> } >> - /* TODO: apply configuration */ >> + rcu_read_unlock(); >> + >> + spin_lock_bh(&ar->htt.tx_lock); >> + ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING); >> + spin_unlock_bh(&ar->htt.tx_lock); >> + > > Isn't it proved it break Mesh from working? Yes, good point. I'm still working on this - I should've mentioned that in the cover letter. Nonetheless I wanted to get the review process going for this patchset. I'll address the mesh problem in v2. MichaƂ