MIME-Version: 1.0
In-Reply-To: <56A1182A.4060309@codeaurora.org>
References: <1453384006-31907-1-git-send-email-michal.kazior@tieto.com>
	<1453384006-31907-14-git-send-email-michal.kazior@tieto.com>
	<56A1182A.4060309@codeaurora.org>
Date: Fri, 22 Jan 2016 08:47:07 +0100
Message-ID: <CA+BoTQnOfc-obhJR7woqg1=ZWBeOEDSQsaXTF1-wqH8V-Z-eRg@mail.gmail.com> (sfid-20160122_084713_631850_4AA703B8)
Subject: Re: [PATCH 13/13] ath10k: implement push-pull tx
From: Michal Kazior <michal.kazior@tieto.com>
To: Peter Oh <poh@codeaurora.org>
Cc: "ath10k@lists.infradead.org" <ath10k@lists.infradead.org>,
	linux-wireless <linux-wireless@vger.kernel.org>
Content-Type: text/plain; charset=UTF-8
Sender: linux-wireless-owner@vger.kernel.org

On 21 January 2016 at 18:40, Peter Oh <poh@codeaurora.org> wrote:
>
> On 01/21/2016 05:46 AM, Michal Kazior wrote:
>>
>> The current/old tx path design was that host, at
>> its own leisure, pushed tx frames to the device.
>> For HTT there was ~1000-1400 msdu queue depth.
>>
>> After reaching that limit the driver would request
>> mac80211 to stop queues. There was little control
>> over what packets got in there as far as
>> DA/RA was considered so it was rather easy to
>> starve per-station traffic flows.
>>
>> With MU-MIMO this became a significant problem
>> because the queue depth was insufficient to buffer
>> frames from multiple clients (which could have
>> different signal quality and capabilities) in an
>> efficient fashion.
>>
>> Hence the new tx path in 10.4 was introduced: a
>> pull-push mode.
>>
>> Firmware and host can share tx queue state via
>> DMA. The state is logically a 2 dimensional array
>> addressed via peer_id+tid pair. Each entry is a
>> counter (either number of bytes or packets. Host
>> keeps it updated and firmware uses it for
>> scheduling Tx pull requests to host.
>>
>> This allows MU-MIMO to become a lot more effective
>> with 10+ clients.
>>
>> Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
>> ---
>>   drivers/net/wireless/ath/ath10k/core.h   |   1 +
>>   drivers/net/wireless/ath/ath10k/htt.h    |   6 ++
>>   drivers/net/wireless/ath/ath10k/htt_rx.c | 117
>> ++++++++++++++++++++++++++++---
>>   drivers/net/wireless/ath/ath10k/htt_tx.c |  39 ++++++++---
>>   drivers/net/wireless/ath/ath10k/mac.c    |  48 +++++++++++--
>>   drivers/net/wireless/ath/ath10k/mac.h    |   5 ++
>>   6 files changed, 192 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/net/wireless/ath/ath10k/core.h
>> b/drivers/net/wireless/ath/ath10k/core.h
>> index f51887c6be74..ab8cbccc0f4b 100644
>> --- a/drivers/net/wireless/ath/ath10k/core.h
>> +++ b/drivers/net/wireless/ath/ath10k/core.h
>> @@ -307,6 +307,7 @@ struct ath10k_peer {
>>     struct ath10k_txq {
>>         unsigned long num_fw_queued;
>> +       unsigned long num_push_allowed;
>>   };
>>     struct ath10k_sta {
>> diff --git a/drivers/net/wireless/ath/ath10k/htt.h
>> b/drivers/net/wireless/ath/ath10k/htt.h
>> index b1e40f44e76b..02cf55d306e8 100644
>> --- a/drivers/net/wireless/ath/ath10k/htt.h
>> +++ b/drivers/net/wireless/ath/ath10k/htt.h
>> @@ -1652,6 +1652,7 @@ struct ath10k_htt {
>>         struct sk_buff_head tx_compl_q;
>>         struct sk_buff_head rx_compl_q;
>>         struct sk_buff_head rx_in_ord_compl_q;
>> +       struct sk_buff_head tx_fetch_ind_q;
>>         /* rx_status template */
>>         struct ieee80211_rx_status rx_status;
>> @@ -1670,8 +1671,10 @@ struct ath10k_htt {
>>                 bool enabled;
>>                 struct htt_q_state *vaddr;
>>                 dma_addr_t paddr;
>> +               u16 num_push_allowed;
>>                 u16 num_peers;
>>                 u16 num_tids;
>> +               enum htt_tx_mode_switch_mode mode;
>>                 enum htt_q_depth_type type;
>>         } tx_q_state;
>>   };
>> @@ -1761,6 +1764,9 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
>>     void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
>>                               struct ieee80211_txq *txq);
>> +void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
>> +                             struct ieee80211_txq *txq);
>> +void ath10k_htt_tx_txq_sync(struct ath10k *ar);
>>   void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt,
>>                                bool is_mgmt);
>>   int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt,
>> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c
>> b/drivers/net/wireless/ath/ath10k/htt_rx.c
>> index 6e3d95c95568..827c8369b589 100644
>> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c
>> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
>> @@ -229,6 +229,7 @@ void ath10k_htt_rx_free(struct ath10k_htt *htt)
>>         skb_queue_purge(&htt->tx_compl_q);
>>         skb_queue_purge(&htt->rx_compl_q);
>>         skb_queue_purge(&htt->rx_in_ord_compl_q);
>> +       skb_queue_purge(&htt->tx_fetch_ind_q);
>>         ath10k_htt_rx_ring_free(htt);
>>   @@ -569,6 +570,7 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt)
>>         skb_queue_head_init(&htt->tx_compl_q);
>>         skb_queue_head_init(&htt->rx_compl_q);
>>         skb_queue_head_init(&htt->rx_in_ord_compl_q);
>> +       skb_queue_head_init(&htt->tx_fetch_ind_q);
>>         tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task,
>>                      (unsigned long)htt);
>> @@ -2004,16 +2006,21 @@ static void
>> ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
>>     static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct
>> sk_buff
>> *skb)
>>   {
>> +       struct ieee80211_hw *hw = ar->hw;
>> +       struct ieee80211_txq *txq;
>>         struct htt_resp *resp = (struct htt_resp *)skb->data;
>>         struct htt_tx_fetch_record *record;
>>         size_t len;
>>         size_t max_num_bytes;
>>         size_t max_num_msdus;
>> +       size_t num_bytes;
>> +       size_t num_msdus;
>>         const __le32 *resp_ids;
>>         u16 num_records;
>>         u16 num_resp_ids;
>>         u16 peer_id;
>>         u8 tid;
>> +       int ret;
>>         int i;
>>         ath10k_dbg(ar, ATH10K_DBG_HTT, "htt rx tx fetch ind\n");
>> @@ -2039,7 +2046,17 @@ static void ath10k_htt_rx_tx_fetch_ind(struct
>> ath10k *ar, struct sk_buff *skb)
>>                    num_records, num_resp_ids,
>>                    le16_to_cpu(resp->tx_fetch_ind.fetch_seq_num));
>>   -     /* TODO: runtime sanity checks */
>> +       if (!ar->htt.tx_q_state.enabled) {
>> +               ath10k_warn(ar, "received unexpected tx_fetch_ind event:
>> not enabled\n");
>> +               return;
>> +       }
>> +
>> +       if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH) {
>> +               ath10k_warn(ar, "received unexpected tx_fetch_ind event:
>> in push mode\n");
>> +               return;
>> +       }
>> +
>> +       rcu_read_lock();
>>         for (i = 0; i < num_records; i++) {
>>                 record = &resp->tx_fetch_ind.records[i];
>> @@ -2060,13 +2077,56 @@ static void ath10k_htt_rx_tx_fetch_ind(struct
>> ath10k *ar, struct sk_buff *skb)
>>                         continue;
>>                 }
>>   -             /* TODO: dequeue and submit tx to device */
>> +               spin_lock_bh(&ar->data_lock);
>> +               txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
>> +               spin_unlock_bh(&ar->data_lock);
>> +
>> +               /* It is okay to release the lock and use txq because RCU
>> read
>> +                * lock is held.
>> +                */
>> +
>> +               if (unlikely(!txq)) {
>> +                       ath10k_warn(ar, "failed to lookup txq for peer_id
>> %hu tid %hhu\n",
>> +                                   peer_id, tid);
>> +                       continue;
>> +               }
>> +
>> +               num_msdus = 0;
>> +               num_bytes = 0;
>> +
>> +               while (num_msdus < max_num_msdus &&
>> +                      num_bytes < max_num_bytes) {
>> +                       ret = ath10k_mac_tx_push_txq(hw, txq);
>> +                       if (ret < 0)
>> +                               break;
>> +
>> +                       num_msdus++;
>> +                       num_bytes += ret;
>> +               }
>> +
>> +               record->num_msdus = cpu_to_le16(num_msdus);
>> +               record->num_bytes = cpu_to_le32(num_bytes);
>> +
>> +               ath10k_htt_tx_txq_recalc(hw, txq);
>>         }
>>   +     rcu_read_unlock();
>> +
>>         resp_ids =
>> ath10k_htt_get_tx_fetch_ind_resp_ids(&resp->tx_fetch_ind);
>>         ath10k_htt_rx_tx_fetch_resp_id_confirm(ar, resp_ids,
>> num_resp_ids);
>>   -     /* TODO: generate and send fetch response to device */
>> +       ret = ath10k_htt_tx_fetch_resp(ar,
>> +                                      resp->tx_fetch_ind.token,
>> +                                      resp->tx_fetch_ind.fetch_seq_num,
>> +                                      resp->tx_fetch_ind.records,
>> +                                      num_records);
>> +       if (unlikely(ret)) {
>> +               ath10k_warn(ar, "failed to submit tx fetch resp for token
>> 0x%08x: %d\n",
>> +                           le32_to_cpu(resp->tx_fetch_ind.token), ret);
>> +               /* FIXME: request fw restart */
>> +       }
>> +
>> +       ath10k_htt_tx_txq_sync(ar);
>>   }
>>     static void ath10k_htt_rx_tx_fetch_confirm(struct ath10k *ar,
>> @@ -2102,6 +2162,8 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct
>> ath10k *ar,
>>   {
>>         const struct htt_resp *resp = (void *)skb->data;
>>         const struct htt_tx_mode_switch_record *record;
>> +       struct ieee80211_txq *txq;
>> +       struct ath10k_txq *artxq;
>>         size_t len;
>>         size_t num_records;
>>         enum htt_tx_mode_switch_mode mode;
>> @@ -2153,7 +2215,11 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct
>> ath10k *ar,
>>         if (!enable)
>>                 return;
>>   -     /* TODO: apply configuration */
>> +       ar->htt.tx_q_state.enabled = enable;
>> +       ar->htt.tx_q_state.mode = mode;
>> +       ar->htt.tx_q_state.num_push_allowed = threshold;
>> +
>> +       rcu_read_lock();
>>         for (i = 0; i < num_records; i++) {
>>                 record = &resp->tx_mode_switch_ind.records[i];
>> @@ -2168,10 +2234,33 @@ static void
>> ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar,
>>                         continue;
>>                 }
>>   -             /* TODO: apply configuration */
>> +               spin_lock_bh(&ar->data_lock);
>> +               txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
>> +               spin_unlock_bh(&ar->data_lock);
>> +
>> +               /* It is okay to release the lock and use txq because RCU
>> read
>> +                * lock is held.
>> +                */
>> +
>> +               if (unlikely(!txq)) {
>> +                       ath10k_warn(ar, "failed to lookup txq for peer_id
>> %hu tid %hhu\n",
>> +                                   peer_id, tid);
>> +                       continue;
>> +               }
>> +
>> +               spin_lock_bh(&ar->htt.tx_lock);
>> +               artxq = (void *)txq->drv_priv;
>> +               artxq->num_push_allowed =
>> le16_to_cpu(record->num_max_msdus);
>> +               spin_unlock_bh(&ar->htt.tx_lock);
>>         }
>>   -     /* TODO: apply configuration */
>> +       rcu_read_unlock();
>> +
>> +       spin_lock_bh(&ar->htt.tx_lock);
>> +       ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING);
>> +       spin_unlock_bh(&ar->htt.tx_lock);
>> +
>
> Isn't it proved it break Mesh from working?

Yes, good point. I'm still working on this - I should've mentioned
that in the cover letter.

Nonetheless I wanted to get the review process going for this patchset.

I'll address the mesh problem in v2.


Michał