2021-08-04 13:50:32

by Ben Greear

[permalink] [raw]
Subject: [PATCH v5 01/11] mt76: add hash lookup for skb on TXS status_list

From: Ben Greear <[email protected]>

This improves performance when sending lots of frames that
are requesting being mapped to a TXS callback.

Add comments to help next person understood the tx path
better.

Signed-off-by: Ben Greear <[email protected]>
---

v5: Rebased on top of previous series.

drivers/net/wireless/mediatek/mt76/mt76.h | 48 +++++++---
.../net/wireless/mediatek/mt76/mt7603/mac.c | 2 +-
.../net/wireless/mediatek/mt76/mt7615/mac.c | 2 +-
.../net/wireless/mediatek/mt76/mt76x02_mac.c | 2 +-
.../net/wireless/mediatek/mt76/mt7915/mac.c | 8 +-
.../net/wireless/mediatek/mt76/mt7921/mac.c | 9 +-
drivers/net/wireless/mediatek/mt76/tx.c | 90 ++++++++++++++++---
7 files changed, 132 insertions(+), 29 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 436bf2b8e2cd..016f563fec39 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -235,6 +235,14 @@ DECLARE_EWMA(signal, 10, 8);
#define MT_WCID_TX_INFO_TXPWR_ADJ GENMASK(25, 18)
#define MT_WCID_TX_INFO_SET BIT(31)

+#define MT_PACKET_ID_MASK GENMASK(6, 0)
+#define MT_PACKET_ID_NO_ACK 0
+/* Request TXS, but don't try to match with skb. */
+#define MT_PACKET_ID_NO_SKB 1
+#define MT_PACKET_ID_FIRST 2
+#define MT_PACKET_ID_HAS_RATE BIT(7)
+#define MT_PACKET_ID_MAX (GENMASK(7, 0) - 1)
+
struct mt76_wcid {
struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS];

@@ -246,6 +254,8 @@ struct mt76_wcid {

struct rate_info rate;

+ struct sk_buff *skb_status_array[MT_PACKET_ID_MAX + 1];
+
u16 idx;
u8 hw_key_idx;
u8 hw_key_idx2;
@@ -302,13 +312,8 @@ struct mt76_rx_tid {
#define MT_TX_CB_TXS_DONE BIT(1)
#define MT_TX_CB_TXS_FAILED BIT(2)

-#define MT_PACKET_ID_MASK GENMASK(6, 0)
-#define MT_PACKET_ID_NO_ACK 0
-#define MT_PACKET_ID_NO_SKB 1
-#define MT_PACKET_ID_FIRST 2
-#define MT_PACKET_ID_HAS_RATE BIT(7)
-
-#define MT_TX_STATUS_SKB_TIMEOUT HZ
+/* This is timer for when to give up when waiting for TXS callback. */
+#define MT_TX_STATUS_SKB_TIMEOUT (HZ / 8)

struct mt76_tx_cb {
unsigned long jiffies;
@@ -651,6 +656,7 @@ struct mt76_dev {
spinlock_t cc_lock;

u32 cur_cc_bss_rx;
+ unsigned long next_status_jiffies;

struct mt76_rx_status rx_ampdu_status;
u32 rx_ampdu_len;
@@ -1090,7 +1096,7 @@ struct sk_buff *mt76_tx_status_skb_get(struct mt76_dev *dev,
struct mt76_wcid *wcid, int pktid,
struct sk_buff_head *list);
void mt76_tx_status_skb_done(struct mt76_dev *dev, struct sk_buff *skb,
- struct sk_buff_head *list);
+ struct sk_buff_head *list, struct mt76_wcid *wcid);
void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid, struct sk_buff *skb,
struct list_head *free_list);
static inline void
@@ -1297,13 +1303,33 @@ mt76_token_put(struct mt76_dev *dev, int token)
}

static inline int
-mt76_get_next_pkt_id(struct mt76_wcid *wcid)
+mt76_get_next_pkt_id(struct mt76_dev *dev, struct mt76_wcid *wcid,
+ struct sk_buff *skb)
{
+ struct sk_buff *qskb;
+
+ lockdep_assert_held(&dev->status_list.lock);
+
wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;
- if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||
- wcid->packet_id == MT_PACKET_ID_NO_SKB)
+ if (wcid->packet_id < MT_PACKET_ID_FIRST)
wcid->packet_id = MT_PACKET_ID_FIRST;

+ qskb = wcid->skb_status_array[wcid->packet_id];
+ if (qskb) {
+ /* bummer, already waiting on this pid. See if it is stale. */
+ struct mt76_tx_cb *cb = mt76_tx_skb_cb(qskb);
+
+ if (!time_after(jiffies, cb->jiffies + MT_TX_STATUS_SKB_TIMEOUT)) {
+ /* ok, not stale. Increment pid anyway, will try next
+ * slot next time
+ */
+ return MT_PACKET_ID_NO_SKB;
+ }
+ }
+
+ /* cache this skb for fast lookup by packet-id */
+ wcid->skb_status_array[wcid->packet_id] = skb;
+
return wcid->packet_id;
}
#endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 3972c56136a2..2f268eb7c1e6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -1230,7 +1230,7 @@ mt7603_mac_add_txs_skb(struct mt7603_dev *dev, struct mt7603_sta *sta, int pid,
info->status.rates[0].idx = -1;
}

- mt76_tx_status_skb_done(mdev, skb, &list);
+ mt76_tx_status_skb_done(mdev, skb, &list, &sta->wcid);
}
mt76_tx_status_unlock(mdev, &list);

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index ff3f85e4087c..381a998817d4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1433,7 +1433,7 @@ static bool mt7615_mac_add_txs_skb(struct mt7615_dev *dev,
info->status.rates[0].idx = -1;
}

- mt76_tx_status_skb_done(mdev, skb, &list);
+ mt76_tx_status_skb_done(mdev, skb, &list, &sta->wcid);
}
mt76_tx_status_unlock(mdev, &list);

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index c32e6dc68773..fce020e64678 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -622,7 +622,7 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
info = *status.info;
len = status.skb->len;
ac = skb_get_queue_mapping(status.skb);
- mt76_tx_status_skb_done(mdev, status.skb, &list);
+ mt76_tx_status_skb_done(mdev, status.skb, &list, wcid);
} else if (msta) {
len = status.info->status.ampdu_len * ewma_pktlen_read(&msta->pktlen);
ac = FIELD_GET(MT_PKTID_AC, cur_pktid);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index d9f52e2611a7..8f5702981900 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1318,6 +1318,8 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,

mt76_tx_status_lock(mdev, &list);
skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
+
+ /* TODO: Gather stats anyway, even if we are not matching on an skb. */
if (!skb)
goto out;

@@ -1417,10 +1419,14 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
stats->tx_bw[0]++;
break;
}
+
+ /* Cache rate for packets that don't get a TXS callback for some
+ * reason.
+ */
wcid->rate = rate;

out:
- mt76_tx_status_skb_done(mdev, skb, &list);
+ mt76_tx_status_skb_done(mdev, skb, &list, wcid);
mt76_tx_status_unlock(mdev, &list);

return !!skb;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 76985a6b3be5..219c17d77e46 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -732,7 +732,9 @@ mt7921_mac_write_txwi_80211(struct mt7921_dev *dev, __le32 *txwi,
txwi[7] |= cpu_to_le32(val);
}

-static void mt7921_update_txs(struct mt76_wcid *wcid, __le32 *txwi)
+static void mt7921_update_txs(struct mt7921_dev *dev,
+ struct mt76_wcid *wcid, __le32 *txwi,
+ struct sk_buff *skb)
{
struct mt7921_sta *msta = container_of(wcid, struct mt7921_sta, wcid);
u32 pid, frame_type = FIELD_GET(MT_TXD2_FRAME_TYPE, txwi[2]);
@@ -744,7 +746,7 @@ static void mt7921_update_txs(struct mt76_wcid *wcid, __le32 *txwi)
return;

msta->next_txs_ts = jiffies + msecs_to_jiffies(250);
- pid = mt76_get_next_pkt_id(wcid);
+ pid = mt76_get_next_pkt_id(&dev->mt76, wcid, skb);
txwi[5] |= cpu_to_le32(MT_TXD5_TX_STATUS_MCU |
FIELD_PREP(MT_TXD5_PID, pid));
}
@@ -771,7 +773,6 @@ void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_vif *vif = info->control.vif;
- struct mt76_phy *mphy = &dev->mphy;
u8 p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP;
u16 tx_count = 15;
@@ -839,7 +840,7 @@ void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE);
}

- mt7921_update_txs(wcid, txwi);
+ mt7921_update_txs(dev, wcid, txwi, skb);
}

static void
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 6f302acb6e69..4c8504d3c904 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -36,6 +36,7 @@ mt76_tx_check_agg_ssn(struct ieee80211_sta *sta, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(mt76_tx_check_agg_ssn);

+/* Lock list, and initialize the timed-out-skb list object. */
void
mt76_tx_status_lock(struct mt76_dev *dev, struct sk_buff_head *list)
__acquires(&dev->status_list.lock)
@@ -45,6 +46,9 @@ mt76_tx_status_lock(struct mt76_dev *dev, struct sk_buff_head *list)
}
EXPORT_SYMBOL_GPL(mt76_tx_status_lock);

+/* Unlock list, and use last-received status for any skbs that
+ * timed out getting TXS callback (they are on the list passed in
+ */
void
mt76_tx_status_unlock(struct mt76_dev *dev, struct sk_buff_head *list)
__releases(&dev->status_list.lock)
@@ -80,20 +84,39 @@ EXPORT_SYMBOL_GPL(mt76_tx_status_unlock);

static void
__mt76_tx_status_skb_done(struct mt76_dev *dev, struct sk_buff *skb, u8 flags,
- struct sk_buff_head *list)
+ struct sk_buff_head *list, struct mt76_wcid *wcid)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb);
u8 done = MT_TX_CB_DMA_DONE | MT_TX_CB_TXS_DONE;

+ lockdep_assert_held(&dev->status_list.lock);
+
flags |= cb->flags;
cb->flags = flags;

+ /* Only process skb with TXS status has been received and also
+ * the txfree (DMA_DONE) callback has happened.
+ */
if ((flags & done) != done)
return;

__skb_unlink(skb, &dev->status_list);

+ rcu_read_lock();
+ /* calling code may not know wcid, for instance in the tx_status_check
+ * path, look it up in that case.
+ */
+ if (!wcid)
+ wcid = rcu_dereference(dev->wcid[cb->wcid]);
+
+ /* Make sure we clear any cached skb. */
+ if (wcid) {
+ if (!(WARN_ON_ONCE(cb->pktid >= ARRAY_SIZE(wcid->skb_status_array))))
+ wcid->skb_status_array[cb->pktid] = NULL;
+ }
+ rcu_read_unlock();
+
/* Tx status can be unreliable. if it fails, mark the frame as ACKed */
if (flags & MT_TX_CB_TXS_FAILED) {
info->status.rates[0].count = 0;
@@ -106,9 +129,9 @@ __mt76_tx_status_skb_done(struct mt76_dev *dev, struct sk_buff *skb, u8 flags,

void
mt76_tx_status_skb_done(struct mt76_dev *dev, struct sk_buff *skb,
- struct sk_buff_head *list)
+ struct sk_buff_head *list, struct mt76_wcid *wcid)
{
- __mt76_tx_status_skb_done(dev, skb, MT_TX_CB_TXS_DONE, list);
+ __mt76_tx_status_skb_done(dev, skb, MT_TX_CB_TXS_DONE, list, wcid);
}
EXPORT_SYMBOL_GPL(mt76_tx_status_skb_done);

@@ -119,6 +142,7 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb);
int pid;
+ int qlen;

if (!wcid)
return MT_PACKET_ID_NO_ACK;
@@ -130,15 +154,30 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,
IEEE80211_TX_CTL_RATE_CTRL_PROBE)))
return MT_PACKET_ID_NO_SKB;

+ /* due to limited range of the pktid (7 bits), we can only
+ * have a limited number of outstanding frames. I think it is OK to
+ * check the length outside of a lock since it doesn't matter too much
+ * if we read wrong data here.
+ * The TX-status callbacks don't always return a callback for an SKB,
+ * so the status_list may contain some stale skbs. Those will be cleaned
+ * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.
+ */
+
+ qlen = skb_queue_len(&dev->status_list);
+ if (qlen > 120)
+ return MT_PACKET_ID_NO_SKB;
+
spin_lock_bh(&dev->status_list.lock);

memset(cb, 0, sizeof(*cb));
- pid = mt76_get_next_pkt_id(wcid);
+ pid = mt76_get_next_pkt_id(dev, wcid, skb);
cb->wcid = wcid->idx;
cb->pktid = pid;
cb->jiffies = jiffies;

- __skb_queue_tail(&dev->status_list, skb);
+ if (cb->pktid != MT_PACKET_ID_NO_SKB)
+ __skb_queue_tail(&dev->status_list, skb);
+
spin_unlock_bh(&dev->status_list.lock);

return pid;
@@ -150,25 +189,56 @@ mt76_tx_status_skb_get(struct mt76_dev *dev, struct mt76_wcid *wcid, int pktid,
struct sk_buff_head *list)
{
struct sk_buff *skb, *tmp;
+ struct sk_buff *rvskb = NULL;
+
+ /* If pktid is < first-valid-id, then it is not something we requested
+ * TXS for, so we will not find SKB. Bail out early in that case,
+ * unless we need to walk due to stale-skb-reaper timeout.
+ */
+ if (pktid < MT_PACKET_ID_FIRST) {
+ if (!time_after(jiffies, dev->next_status_jiffies))
+ return NULL;
+ goto check_list;
+ }
+
+ if (wcid) {
+ lockdep_assert_held(&dev->status_list.lock);
+ if (WARN_ON_ONCE(pktid >= ARRAY_SIZE(wcid->skb_status_array))) {
+ dev_err(dev->dev, "invalid pktid: %d status-array-size: %d\n",
+ pktid, (int)(ARRAY_SIZE(wcid->skb_status_array)));
+ WARN_ON_ONCE(true);
+ goto check_list;
+ }

+ skb = wcid->skb_status_array[pktid];
+
+ if (skb && !time_after(jiffies, dev->next_status_jiffies))
+ return skb;
+ }
+
+check_list:
skb_queue_walk_safe(&dev->status_list, skb, tmp) {
struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb);

if (wcid && cb->wcid != wcid->idx)
continue;

- if (cb->pktid == pktid)
- return skb;
+ if (cb->pktid == pktid) {
+ /* Found our skb, but check for timeouts too */
+ rvskb = skb;
+ continue;
+ }

if (pktid >= 0 && !time_after(jiffies, cb->jiffies +
MT_TX_STATUS_SKB_TIMEOUT))
continue;

__mt76_tx_status_skb_done(dev, skb, MT_TX_CB_TXS_FAILED |
- MT_TX_CB_TXS_DONE, list);
+ MT_TX_CB_TXS_DONE, list, wcid);
}
+ dev->next_status_jiffies = jiffies + MT_TX_STATUS_SKB_TIMEOUT + 1;

- return NULL;
+ return rvskb;
}
EXPORT_SYMBOL_GPL(mt76_tx_status_skb_get);

@@ -238,7 +308,7 @@ void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff *
}

mt76_tx_status_lock(dev, &list);
- __mt76_tx_status_skb_done(dev, skb, MT_TX_CB_DMA_DONE, &list);
+ __mt76_tx_status_skb_done(dev, skb, MT_TX_CB_DMA_DONE, &list, wcid);
mt76_tx_status_unlock(dev, &list);

out:
--
2.20.1


2021-08-04 13:50:51

by Ben Greear

[permalink] [raw]
Subject: [PATCH v5 04/11] mt76: mt7915: allow processing TXS for 'NO_SKB' pkt-ids

From: Ben Greear <[email protected]>

This will let us update stats and wcid.rate for every TXS
callback we receive for a particular wcid.

For now, the TXS is not requested for NO_SKB frames, however.
That will be allowed in next patch.

Signed-off-by: Ben Greear <[email protected]>
---
.../net/wireless/mediatek/mt76/mt7915/mac.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 6f92e207680f..2228dad71657 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1047,6 +1047,7 @@ void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
txwi[4] = 0;

val = FIELD_PREP(MT_TXD5_PID, pid);
+ /* NOTE: mt7916 does NOT request TXS for 'NO_SKB' frames by default. */
if (pid >= MT_PACKET_ID_FIRST)
val |= MT_TXD5_TX_STATUS_HOST;
txwi[5] = cpu_to_le32(val);
@@ -1430,10 +1431,16 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
* paired with TXS data. This is normal datapath.
*/
struct rate_info *rate = &wcid->rate;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
+ bool check_status;

- mt76_tx_status_lock(mdev, &list);
- skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
+ check_status = ((pid >= MT_PACKET_ID_FIRST) ||
+ time_after(jiffies, mdev->next_status_jiffies));
+
+ if (check_status) {
+ mt76_tx_status_lock(mdev, &list);
+ skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
+ }

memset(rate, 0, sizeof(*rate));

@@ -1447,7 +1454,8 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
if (skb)
mt76_tx_status_skb_done(mdev, skb, &list, wcid);

- mt76_tx_status_unlock(mdev, &list);
+ if (check_status)
+ mt76_tx_status_unlock(mdev, &list);
}

static void mt7915_mac_add_txs(struct mt7915_dev *dev, void *data)
@@ -1469,7 +1477,7 @@ static void mt7915_mac_add_txs(struct mt7915_dev *dev, void *data)
txs = le32_to_cpu(txs_data[3]);
pid = FIELD_GET(MT_TXS3_PID, txs);

- if (pid < MT_PACKET_ID_FIRST)
+ if (pid < MT_PACKET_ID_NO_SKB)
return;

if (wcidx >= MT7915_WTBL_SIZE)
--
2.20.1

2021-08-04 13:51:12

by Ben Greear

[permalink] [raw]
Subject: [PATCH v5 09/11] mt76: mt7915: txfree status to show txcount instead of latency

From: Ben Greear <[email protected]>

Latency is not obviously that useful, but txcount can let us deduce
retries, which may be more interesting.

Signed-off-by: Ben Greear <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mt7915/init.c | 3 +++
drivers/net/wireless/mediatek/mt76/mt7915/mac.h | 8 +++++---
drivers/net/wireless/mediatek/mt76/mt7915/regs.h | 4 ++++
3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index f174cf219724..a0d282771d77 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -304,6 +304,9 @@ static void mt7915_mac_init(struct mt7915_dev *dev)
/* enable hardware de-agg */
mt76_set(dev, MT_MDP_DCR0, MT_MDP_DCR0_DAMSDU_EN);

+ /* disable Tx latency report to enable Tx count in txfree path */
+ mt76_clear(dev, MT_PLE_HOST_RPT0, MT_PLE_HOST_RPT0_TX_LATENCY);
+
for (i = 0; i < MT7915_WTBL_SIZE; i++)
mt7915_mac_wtbl_update(dev, i,
MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
index 65dba1061376..3f5a80158866 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
@@ -304,13 +304,15 @@ struct mt7915_tx_free {

#define MT_TX_FREE_MSDU_CNT GENMASK(9, 0)
#define MT_TX_FREE_WLAN_ID GENMASK(23, 14)
+/* when configured for txfree latency mode. See MT_PLE_HOST_RPT0_TX_LATENCY
+ * Not enabled by default now.
+ */
#define MT_TX_FREE_LATENCY GENMASK(12, 0)
-/* 0: success, others: dropped */
+/* when configured for txcount mode. See MT_PLE_HOST_RPT0_TX_LATENCY. */
+#define MT_TX_FREE_TXCNT GENMASK(12, 0)
#define MT_TX_FREE_STATUS GENMASK(14, 13)
#define MT_TX_FREE_MSDU_ID GENMASK(30, 16)
#define MT_TX_FREE_PAIR BIT(31)
-/* will support this field in further revision */
-#define MT_TX_FREE_RATE GENMASK(13, 0)

#define MT_TXS0_FIXED_RATE BIT(31)
#define MT_TXS0_BW GENMASK(30, 29)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
index 62cc32a098fc..ac4d233b8cf2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -22,6 +22,10 @@
#define MT_PLE_BASE 0x8000
#define MT_PLE(ofs) (MT_PLE_BASE + (ofs))

+/* Modify whether txfree struct returns latency or txcount. */
+#define MT_PLE_HOST_RPT0 MT_PLE(0x030)
+#define MT_PLE_HOST_RPT0_TX_LATENCY BIT(3)
+
#define MT_PLE_FL_Q0_CTRL MT_PLE(0x1b0)
#define MT_PLE_FL_Q1_CTRL MT_PLE(0x1b4)
#define MT_PLE_FL_Q2_CTRL MT_PLE(0x1b8)
--
2.20.1

2021-08-04 13:51:14

by Ben Greear

[permalink] [raw]
Subject: [PATCH v5 08/11] mt76: mt7915: fix SGI reporting when using tx-overrides

From: Ben Greear <[email protected]>

The station wtbl logic to read rate-ctrl settings does not work when
fixed rates are used. So, read sgi settings from the txo configuration
in this case.

Signed-off-by: Ben Greear <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 01f4aa7a6e88..6dd86dbe3e08 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -176,6 +176,15 @@ static void mt7915_mac_sta_poll(struct mt7915_dev *dev)
rx_cur);
}

+ /* If we are in tx-override mode, then wtbl doesn't provide useful report
+ * for the SGI/LGI stuff, so just get it from the override struct.
+ */
+ if (msta->test.txo_active) {
+ msta->wcid.rate_he_gi = msta->test.tx_rate_sgi;
+ msta->wcid.rate_short_gi = msta->test.tx_rate_sgi;
+ continue;
+ }
+
/*
* We don't support reading GI info from txs packets.
* For accurate tx status reporting and AQL improvement,
--
2.20.1

2021-08-04 13:51:14

by Ben Greear

[permalink] [raw]
Subject: [PATCH v5 07/11] mt76: mt7915: add support for tx-overrides

From: Ben Greear <[email protected]>

Allow setting fix rate on transmit without using full testmode
logic.

txpower, dynbw, retry count is not currently supported.
And, probably later need additional logic to not apply this
txo to non-data frames and to smaller frames, to allow
ARP and such to go through while also forcing test data frames
to arbitrary tx-rates (rates which very well may not be
received by peer.)

Signed-off-by: Ben Greear <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mt76.h | 4 +
.../wireless/mediatek/mt76/mt7915/debugfs.c | 224 ++++++++++++++++++
.../net/wireless/mediatek/mt76/mt7915/mac.c | 98 ++++++--
.../wireless/mediatek/mt76/mt7915/mt7915.h | 2 +
4 files changed, 302 insertions(+), 26 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 0cbb4940f590..52f5adaee2c2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -566,6 +566,7 @@ struct mt76_testmode_ops {

struct mt76_testmode_data {
enum mt76_testmode_state state;
+ u8 txo_active; /* tx overrides are active */

u32 param_set[DIV_ROUND_UP(NUM_MT76_TM_ATTRS, 32)];
struct sk_buff *tx_skb;
@@ -580,6 +581,9 @@ struct mt76_testmode_data {
u8 tx_rate_ldpc;
u8 tx_rate_stbc;
u8 tx_ltf;
+ u8 txbw; /* specify TX bandwidth: 0 20Mhz, 1 40Mhz, 2 80Mhz, 3 160Mhz */
+ u8 tx_xmit_count; /* 0 means no-ack, 1 means one transmit, etc */
+ u8 tx_dynbw; /* 0: dynamic bw disabled, 1: dynamic bw enabled */

u8 tx_antenna_mask;
u8 tx_spe_idx;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index 6be194f16548..f2ff0d3f52cd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -109,6 +109,228 @@ mt7915_fw_debug_get(void *data, u64 *val)
DEFINE_DEBUGFS_ATTRIBUTE(fops_fw_debug, mt7915_fw_debug_get,
mt7915_fw_debug_set, "%lld\n");

+struct mt7915_txo_worker_info {
+ char* buf;
+ int sofar;
+ int size;
+};
+
+static void mt7915_txo_worker(void *wi_data, struct ieee80211_sta *sta)
+{
+ struct mt7915_txo_worker_info *wi = wi_data;
+ struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+ struct mt76_testmode_data *td = &msta->test;
+ struct ieee80211_vif *vif;
+ struct wireless_dev *wdev;
+
+ if (wi->sofar >= wi->size)
+ return; /* buffer is full */
+
+ vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv);
+ wdev = ieee80211_vif_to_wdev(vif);
+
+ wi->sofar += scnprintf(wi->buf + wi->sofar, wi->size - wi->sofar,
+ "vdev (%s) active=%d tpc=%d sgi=%d mcs=%d nss=%d"
+ " pream=%d retries=%d dynbw=%d bw=%d\n",
+ wdev->netdev->name,
+ td->txo_active, td->tx_power[0],
+ td->tx_rate_sgi, td->tx_rate_idx,
+ td->tx_rate_nss, td->tx_rate_mode,
+ td->tx_xmit_count, td->tx_dynbw,
+ td->txbw);
+}
+
+static ssize_t mt7915_read_set_rate_override(struct file *file,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct mt7915_dev *dev = file->private_data;
+ struct ieee80211_hw *hw = dev->mphy.hw;
+ char *buf2;
+ int size = 8000;
+ int rv, sofar;
+ struct mt7915_txo_worker_info wi;
+ const char buf[] =
+ "This allows specify specif tx rate parameters for all DATA"
+ " frames on a vdev\n"
+ "To set a value, you specify the dev-name and key-value pairs:\n"
+ "tpc=10 sgi=1 mcs=x nss=x pream=x retries=x dynbw=0|1 bw=x enable=0|1\n"
+ "pream: 0=cck, 1=ofdm, 2=HT, 3=VHT, 4=HE_SU\n"
+ "cck-mcs: 0=1Mbps, 1=2Mbps, 3=5.5Mbps, 3=11Mbps\n"
+ "ofdm-mcs: 0=6Mbps, 1=9Mbps, 2=12Mbps, 3=18Mbps, 4=24Mbps, 5=36Mbps,"
+ " 6=48Mbps, 7=54Mbps\n"
+ "tpc is not implemented currently, bw is 0-3 for 20-160\n"
+ " For example, wlan0:\n"
+ "echo \"wlan0 tpc=255 sgi=1 mcs=0 nss=1 pream=3 retries=1 dynbw=0 bw=0"
+ " active=1\" > ...mt76/set_rate_override\n";
+
+ buf2 = kzalloc(size, GFP_KERNEL);
+ if (!buf2)
+ return -ENOMEM;
+ strcpy(buf2, buf);
+ sofar = strlen(buf2);
+
+ wi.sofar = sofar;
+ wi.buf = buf2;
+ wi.size = size;
+
+ ieee80211_iterate_stations_atomic(hw, mt7915_txo_worker, &wi);
+
+ rv = simple_read_from_buffer(user_buf, count, ppos, buf2, wi.sofar);
+ kfree(buf2);
+ return rv;
+}
+
+/* Set the rates for specific types of traffic.
+ */
+static ssize_t mt7915_write_set_rate_override(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct mt7915_dev *dev = file->private_data;
+ struct mt7915_sta *msta;
+ struct ieee80211_vif *vif;
+ struct mt76_testmode_data *td = NULL;
+ struct wireless_dev *wdev;
+ struct mt76_wcid *wcid;
+ struct mt76_phy *mphy = &dev->mt76.phy;
+ char buf[180];
+ char tmp[20];
+ char *tok;
+ int ret, i, j;
+ unsigned int vdev_id = 0xFFFF;
+ char *bufptr = buf;
+ long rc;
+ char dev_name_match[IFNAMSIZ + 2];
+
+ memset(buf, 0, sizeof(buf));
+
+ simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count);
+
+ /* make sure that buf is null terminated */
+ buf[sizeof(buf) - 1] = 0;
+
+ /* drop the possible '\n' from the end */
+ if (buf[count - 1] == '\n')
+ buf[count - 1] = 0;
+
+ mutex_lock(&mphy->dev->mutex);
+
+ /* Ignore empty lines, 'echo' appends them sometimes at least. */
+ if (buf[0] == 0) {
+ ret = count;
+ goto exit;
+ }
+
+ /* String starts with vdev name, ie 'wlan0' Find the proper vif that
+ * matches the name.
+ */
+ for (i = 0; i < ARRAY_SIZE(dev->mt76.wcid_mask); i++) {
+ u32 mask = dev->mt76.wcid_mask[i];
+ u32 phy_mask = dev->mt76.wcid_phy_mask[i];
+
+ if (!mask)
+ continue;
+
+ for (j = i * 32; mask; j++, mask >>= 1, phy_mask >>= 1) {
+ if (!(mask & 1))
+ continue;
+
+ wcid = rcu_dereference(dev->mt76.wcid[j]);
+ if (!wcid)
+ continue;
+
+ msta = container_of(wcid, struct mt7915_sta, wcid);
+
+ vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv);
+
+ wdev = ieee80211_vif_to_wdev(vif);
+
+ if (!wdev)
+ continue;
+
+ snprintf(dev_name_match, sizeof(dev_name_match) - 1, "%s ",
+ wdev->netdev->name);
+
+ if (strncmp(dev_name_match, buf, strlen(dev_name_match)) == 0) {
+ vdev_id = j;
+ td = &msta->test;
+ bufptr = buf + strlen(dev_name_match) - 1;
+ break;
+ }
+ }
+ }
+
+ if (vdev_id == 0xFFFF) {
+ if (strstr(buf, "active=0")) {
+ /* Ignore, we are disabling it anyway */
+ ret = count;
+ goto exit;
+ } else {
+ dev_info(dev->mt76.dev,
+ "mt7915: set-rate-override, unknown netdev name: %s\n", buf);
+ }
+ ret = -EINVAL;
+ goto exit;
+ }
+
+#define MT7915_PARSE_LTOK(a, b) \
+ do { \
+ tok = strstr(bufptr, " " #a "="); \
+ if (tok) { \
+ char *tspace; \
+ tok += 1; /* move past initial space */ \
+ strncpy(tmp, tok + strlen(#a "="), sizeof(tmp) - 1); \
+ tmp[sizeof(tmp) - 1] = 0; \
+ tspace = strstr(tmp, " "); \
+ if (tspace) \
+ *tspace = 0; \
+ if (kstrtol(tmp, 0, &rc) != 0) \
+ dev_info(dev->mt76.dev, \
+ "mt7915: set-rate-override: " #a \
+ "= could not be parsed, tmp: %s\n", \
+ tmp); \
+ else \
+ td->b = rc; \
+ } \
+ } while (0)
+
+ /* TODO: Allow configuring LTF? */
+ td->tx_ltf = 1; /* 0: HTLTF 3.2us, 1: HELTF, 6.4us, 2 HELTF 12,8us */
+
+ MT7915_PARSE_LTOK(tpc, tx_power[0]);
+ MT7915_PARSE_LTOK(sgi, tx_rate_sgi);
+ MT7915_PARSE_LTOK(mcs, tx_rate_idx);
+ MT7915_PARSE_LTOK(nss, tx_rate_nss);
+ MT7915_PARSE_LTOK(pream, tx_rate_mode);
+ MT7915_PARSE_LTOK(retries, tx_xmit_count);
+ MT7915_PARSE_LTOK(dynbw, tx_dynbw);
+ MT7915_PARSE_LTOK(bw, txbw);
+ MT7915_PARSE_LTOK(active, txo_active);
+
+ dev_info(dev->mt76.dev,
+ "mt7915: set-rate-overrides, vdev %i(%s) active=%d tpc=%d sgi=%d mcs=%d"
+ " nss=%d pream=%d retries=%d dynbw=%d bw=%d\n",
+ vdev_id, dev_name_match,
+ td->txo_active, td->tx_power[0], td->tx_rate_sgi, td->tx_rate_idx,
+ td->tx_rate_nss, td->tx_rate_mode, td->tx_xmit_count, td->tx_dynbw,
+ td->txbw);
+
+ ret = count;
+
+exit:
+ mutex_unlock(&mphy->dev->mutex);
+ return ret;
+}
+
+static const struct file_operations fops_set_rate_override = {
+ .read = mt7915_read_set_rate_override,
+ .write = mt7915_write_set_rate_override,
+ .open = simple_open,
+ .owner = THIS_MODULE,
+ .llseek = default_llseek,
+};
+
static int
mt7915_txs_for_no_skb_set(void *data, u64 val)
{
@@ -377,6 +599,8 @@ int mt7915_init_debugfs(struct mt7915_dev *dev)
debugfs_create_file("ser_trigger", 0200, dir, dev, &fops_ser_trigger);
debugfs_create_devm_seqfile(dev->mt76.dev, "txpower_sku", dir,
mt7915_read_rate_txpower);
+ debugfs_create_file("set_rate_override", 0600, dir,
+ dev, &fops_set_rate_override);

return 0;
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index bdcd1aae10d1..01f4aa7a6e88 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -757,21 +757,31 @@ mt7915_mac_fill_rx_vector(struct mt7915_dev *dev, struct sk_buff *skb)
}

static void
-mt7915_mac_write_txwi_tm(struct mt7915_phy *phy, __le32 *txwi,
+mt7915_mac_write_txwi_tm(struct mt7915_phy *phy, struct mt76_wcid *wcid, __le32 *txwi,
struct sk_buff *skb)
{
-#ifdef CONFIG_NL80211_TESTMODE
- struct mt76_testmode_data *td = &phy->mt76->test;
+ struct mt76_testmode_data *td;
const struct ieee80211_rate *r;
- u8 bw, mode, nss = td->tx_rate_nss;
- u8 rate_idx = td->tx_rate_idx;
+ struct mt7915_sta *msta;
+ u8 bw, mode, nss;
+ u8 rate_idx;
u16 rateval = 0;
u32 val;
bool cck = false;
int band;

- if (skb != phy->mt76->test.tx_skb)
- return;
+ msta = container_of(wcid, struct mt7915_sta, wcid);
+
+ if (msta->test.txo_active) {
+ td = &msta->test;
+ } else {
+ if (skb != phy->mt76->test.tx_skb)
+ return;
+ td = &phy->mt76->test;
+ }
+
+ nss = td->tx_rate_nss;
+ rate_idx = td->tx_rate_idx;

switch (td->tx_rate_mode) {
case MT76_TM_TX_MODE_HT:
@@ -812,20 +822,24 @@ mt7915_mac_write_txwi_tm(struct mt7915_phy *phy, __le32 *txwi,
break;
}

- switch (phy->mt76->chandef.width) {
- case NL80211_CHAN_WIDTH_40:
- bw = 1;
- break;
- case NL80211_CHAN_WIDTH_80:
- bw = 2;
- break;
- case NL80211_CHAN_WIDTH_80P80:
- case NL80211_CHAN_WIDTH_160:
- bw = 3;
- break;
- default:
- bw = 0;
- break;
+ if (msta->test.txo_active) {
+ bw = td->txbw;
+ } else {
+ switch (phy->mt76->chandef.width) {
+ case NL80211_CHAN_WIDTH_40:
+ bw = 1;
+ break;
+ case NL80211_CHAN_WIDTH_80:
+ bw = 2;
+ break;
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ bw = 3;
+ break;
+ default:
+ bw = 0;
+ break;
+ }
}

if (td->tx_rate_stbc && nss == 1) {
@@ -837,12 +851,17 @@ mt7915_mac_write_txwi_tm(struct mt7915_phy *phy, __le32 *txwi,
FIELD_PREP(MT_TX_RATE_MODE, mode) |
FIELD_PREP(MT_TX_RATE_NSS, nss - 1);

+ /* TODO: Support per-skb txpower, p.15 of txpower doc, DW2 29:24. */
txwi[2] |= cpu_to_le32(MT_TXD2_FIX_RATE);

+ /* Looks like this sets tx attempt to exactly 1.
+ * TODO: Use td->tx_xmit_count, if in txo mode.
+ */
le32p_replace_bits(&txwi[3], 1, MT_TXD3_REM_TX_COUNT);
if (td->tx_rate_mode < MT76_TM_TX_MODE_HT)
txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE);

+ /* TODO: Take tx_dynbw into account in txo mode. */
val = MT_TXD6_FIXED_BW |
FIELD_PREP(MT_TXD6_BW, bw) |
FIELD_PREP(MT_TXD6_TX_RATE, rateval) |
@@ -866,9 +885,29 @@ mt7915_mac_write_txwi_tm(struct mt7915_phy *phy, __le32 *txwi,

txwi[3] &= ~cpu_to_le32(MT_TXD3_SN_VALID);
txwi[6] |= cpu_to_le32(val);
- txwi[7] |= cpu_to_le32(FIELD_PREP(MT_TXD7_SPE_IDX,
- phy->test.spe_idx));
-#endif
+
+ if (msta->test.txo_active) {
+ /* see mt7915_tm_set_tx_frames */
+ static const u8 spe_idx_map[] = {0, 0, 1, 0, 3, 2, 4, 0,
+ 9, 8, 6, 10, 16, 12, 18, 0};
+ u32 spe_idx;
+
+ if (td->tx_spe_idx) {
+ spe_idx = td->tx_spe_idx;
+ } else {
+ u8 tx_ant = td->tx_antenna_mask;
+
+ if (!tx_ant) {
+ /* use antenna mask that matches our nss */
+ tx_ant = GENMASK(nss - 1, 0);
+ }
+ spe_idx = spe_idx_map[tx_ant];
+ }
+ txwi[7] |= cpu_to_le32(FIELD_PREP(MT_TXD7_SPE_IDX, spe_idx));
+ } else {
+ txwi[7] |= cpu_to_le32(FIELD_PREP(MT_TXD7_SPE_IDX,
+ phy->test.spe_idx));
+ }
}

static void
@@ -1078,8 +1117,15 @@ void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE);
}

- if (mt76_testmode_enabled(mphy))
- mt7915_mac_write_txwi_tm(mphy->priv, txwi, skb);
+#ifdef CONFIG_NL80211_TESTMODE
+ {
+ struct mt7915_sta *msta;
+
+ msta = container_of(wcid, struct mt7915_sta, wcid);
+ if (mt76_testmode_enabled(mphy) || msta->test.txo_active)
+ mt7915_mac_write_txwi_tm(mphy->priv, wcid, txwi, skb);
+ }
+#endif
}

int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 1c0a1bf91c1c..ebbb92fd6620 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -92,6 +92,8 @@ struct mt7915_sta {
struct mt7915_sta_stats stats;

struct mt7915_sta_key_conf bip;
+
+ struct mt76_testmode_data test;
};

struct mt7915_vif {
--
2.20.1

2021-08-04 13:51:21

by Ben Greear

[permalink] [raw]
Subject: [PATCH v5 03/11] mt76: mt7915: move TXS parsing to its own method

From: Ben Greear <[email protected]>

This will allow us to also parse TXS when we have no skb on
the status callback list in future patches.

Explicitly cache short_gi and he_gi in wcid, don't try to store
it in the wcid.rate object. Slightly less confusing and less fragile
when TXS starts parsing lots of frames.

Signed-off-by: Ben Greear <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mt76.h | 5 +
.../net/wireless/mediatek/mt76/mt7915/mac.c | 122 ++++++++++--------
2 files changed, 74 insertions(+), 53 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 016f563fec39..0cbb4940f590 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -252,7 +252,12 @@ struct mt76_wcid {
struct ewma_signal rssi;
int inactive_count;

+ /* cached rate, updated from mac_sta_poll() and from TXS callback logic, in 7915
+ * at least.
+ */
struct rate_info rate;
+ bool rate_short_gi; /* cached HT/VHT short_gi, from mac_sta_poll() */
+ u8 rate_he_gi; /* cached HE GI, from mac_sta_poll() */

struct sk_buff *skb_status_array[MT_PACKET_ID_MAX + 1];

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index b568d6baa768..6f92e207680f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -205,12 +205,17 @@ static void mt7915_mac_sta_poll(struct mt7915_dev *dev)
u8 offs = 24 + 2 * bw;

rate->he_gi = (val & (0x3 << offs)) >> offs;
+ msta->wcid.rate_he_gi = rate->he_gi; /* cache for later */
} else if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) {
- if (val & BIT(12 + bw))
+ if (val & BIT(12 + bw)) {
rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
- else
+ msta->wcid.rate_short_gi = 1;
+ } else {
rate->flags &= ~RATE_INFO_FLAGS_SHORT_GI;
+ msta->wcid.rate_short_gi = 0;
+ }
}
+ /* TODO: Deal with HT_MCS */
}

rcu_read_unlock();
@@ -1302,30 +1307,19 @@ mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
}
}

-static bool
-mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
- __le32 *txs_data, struct mt7915_sta_stats *stats)
+static void
+mt7915_mac_parse_txs(struct mt7915_dev *dev, struct mt76_wcid *wcid,
+ __le32 *txs_data, struct mt7915_sta_stats *stats,
+ struct rate_info *rate,
+ struct ieee80211_tx_info *info)
{
struct ieee80211_supported_band *sband;
- struct mt76_dev *mdev = &dev->mt76;
struct mt76_phy *mphy;
- struct ieee80211_tx_info *info;
- struct sk_buff_head list;
- struct rate_info rate = {};
- struct sk_buff *skb;
- bool cck = false;
u32 txrate, txs, mode;
-
- mt76_tx_status_lock(mdev, &list);
- skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
-
- /* TODO: Gather stats anyway, even if we are not matching on an skb. */
- if (!skb)
- goto out_no_skb;
+ bool cck = false;

txs = le32_to_cpu(txs_data[0]);

- info = IEEE80211_SKB_CB(skb);
if (!(txs & MT_TXS0_ACK_ERROR_MASK))
info->flags |= IEEE80211_TX_STAT_ACK;

@@ -1337,18 +1331,18 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,

txrate = FIELD_GET(MT_TXS0_TX_RATE, txs);

- rate.mcs = FIELD_GET(MT_TX_RATE_IDX, txrate);
- rate.nss = FIELD_GET(MT_TX_RATE_NSS, txrate) + 1;
+ rate->mcs = FIELD_GET(MT_TX_RATE_IDX, txrate);
+ rate->nss = FIELD_GET(MT_TX_RATE_NSS, txrate) + 1;

- stats->tx_nss[rate.nss - 1]++;
+ stats->tx_nss[rate->nss - 1]++;
/* It appears that rate.mcs even for HT may be small, considering in HT
* code below it is multiplied... but not certain on that,
* so code safely.
*/
- if (rate.mcs >= ARRAY_SIZE(stats->tx_mcs))
+ if (rate->mcs >= ARRAY_SIZE(stats->tx_mcs))
stats->tx_mcs[ARRAY_SIZE(stats->tx_mcs) - 1]++;
else
- stats->tx_mcs[rate.mcs]++;
+ stats->tx_mcs[rate->mcs]++;

mode = FIELD_GET(MT_TX_RATE_MODE, txrate);
switch (mode) {
@@ -1365,73 +1359,95 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
else
sband = &mphy->sband_2g.sband;

- rate.mcs = mt76_get_rate(mphy->dev, sband, rate.mcs, cck);
- rate.legacy = sband->bitrates[rate.mcs].bitrate;
+ rate->mcs = mt76_get_rate(mphy->dev, sband, rate->mcs, cck);
+ rate->legacy = sband->bitrates[rate->mcs].bitrate;
break;
case MT_PHY_TYPE_HT:
case MT_PHY_TYPE_HT_GF:
- rate.mcs += (rate.nss - 1) * 8;
- if (rate.mcs > 31)
- goto out;
+ rate->mcs += (rate->nss - 1) * 8;
+ if (rate->mcs > 31)
+ break;

- rate.flags = RATE_INFO_FLAGS_MCS;
- if (wcid->rate.flags & RATE_INFO_FLAGS_SHORT_GI)
- rate.flags |= RATE_INFO_FLAGS_SHORT_GI;
+ rate->flags = RATE_INFO_FLAGS_MCS;
+ if (wcid->rate_short_gi)
+ rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
break;
case MT_PHY_TYPE_VHT:
- if (rate.mcs > 9)
- goto out;
+ if (rate->mcs > 9)
+ break;

- rate.flags = RATE_INFO_FLAGS_VHT_MCS;
+ rate->flags = RATE_INFO_FLAGS_VHT_MCS;
+ if (wcid->rate_short_gi)
+ rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
break;
case MT_PHY_TYPE_HE_SU:
case MT_PHY_TYPE_HE_EXT_SU:
case MT_PHY_TYPE_HE_TB:
case MT_PHY_TYPE_HE_MU:
- if (rate.mcs > 11)
- goto out;
+ if (rate->mcs > 11)
+ break;

- rate.he_gi = wcid->rate.he_gi;
- rate.he_dcm = FIELD_GET(MT_TX_RATE_DCM, txrate);
- rate.flags = RATE_INFO_FLAGS_HE_MCS;
+ rate->he_gi = wcid->rate_he_gi;
+ rate->he_dcm = FIELD_GET(MT_TX_RATE_DCM, txrate);
+ rate->flags = RATE_INFO_FLAGS_HE_MCS;
break;
default:
- goto out;
+ WARN_ON_ONCE(true);
+ mode = 0;
}

stats->tx_mode[mode]++;

switch (FIELD_GET(MT_TXS0_BW, txs)) {
case IEEE80211_STA_RX_BW_160:
- rate.bw = RATE_INFO_BW_160;
+ rate->bw = RATE_INFO_BW_160;
stats->tx_bw[3]++;
break;
case IEEE80211_STA_RX_BW_80:
- rate.bw = RATE_INFO_BW_80;
+ rate->bw = RATE_INFO_BW_80;
stats->tx_bw[2]++;
break;
case IEEE80211_STA_RX_BW_40:
- rate.bw = RATE_INFO_BW_40;
+ rate->bw = RATE_INFO_BW_40;
stats->tx_bw[1]++;
break;
default:
- rate.bw = RATE_INFO_BW_20;
+ rate->bw = RATE_INFO_BW_20;
stats->tx_bw[0]++;
break;
}
+}

- /* Cache rate for packets that don't get a TXS callback for some
- * reason.
+static void
+mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
+ __le32 *txs_data, struct mt7915_sta_stats *stats)
+{
+ struct mt76_dev *mdev = &dev->mt76;
+ struct ieee80211_tx_info *info;
+ struct ieee80211_tx_info info_stack;
+ struct sk_buff_head list;
+ /* rate is cached in wcid->rate for skbs that do not request to be
+ * paired with TXS data. This is normal datapath.
*/
- wcid->rate = rate;
+ struct rate_info *rate = &wcid->rate;
+ struct sk_buff *skb;

-out:
- mt76_tx_status_skb_done(mdev, skb, &list, wcid);
+ mt76_tx_status_lock(mdev, &list);
+ skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);

-out_no_skb:
- mt76_tx_status_unlock(mdev, &list);
+ memset(rate, 0, sizeof(*rate));

- return !!skb;
+ if (skb)
+ info = IEEE80211_SKB_CB(skb);
+ else
+ info = &info_stack;
+
+ mt7915_mac_parse_txs(dev, wcid, txs_data, stats, rate, info);
+
+ if (skb)
+ mt76_tx_status_skb_done(mdev, skb, &list, wcid);
+
+ mt76_tx_status_unlock(mdev, &list);
}

static void mt7915_mac_add_txs(struct mt7915_dev *dev, void *data)
--
2.20.1

2021-08-13 16:53:15

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH v5 01/11] mt76: add hash lookup for skb on TXS status_list


On 2021-08-04 15:44, [email protected] wrote:
> From: Ben Greear <[email protected]>
>
> This improves performance when sending lots of frames that
> are requesting being mapped to a TXS callback.
>
> Add comments to help next person understood the tx path
> better.
>
> Signed-off-by: Ben Greear <[email protected]>
> ---
>
> v5: Rebased on top of previous series.
>
> drivers/net/wireless/mediatek/mt76/mt76.h | 48 +++++++---
> .../net/wireless/mediatek/mt76/mt7603/mac.c | 2 +-
> .../net/wireless/mediatek/mt76/mt7615/mac.c | 2 +-
> .../net/wireless/mediatek/mt76/mt76x02_mac.c | 2 +-
> .../net/wireless/mediatek/mt76/mt7915/mac.c | 8 +-
> .../net/wireless/mediatek/mt76/mt7921/mac.c | 9 +-
> drivers/net/wireless/mediatek/mt76/tx.c | 90 ++++++++++++++++---
> 7 files changed, 132 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 436bf2b8e2cd..016f563fec39 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -235,6 +235,14 @@ DECLARE_EWMA(signal, 10, 8);
> #define MT_WCID_TX_INFO_TXPWR_ADJ GENMASK(25, 18)
> #define MT_WCID_TX_INFO_SET BIT(31)
>
> +#define MT_PACKET_ID_MASK GENMASK(6, 0)
> +#define MT_PACKET_ID_NO_ACK 0
> +/* Request TXS, but don't try to match with skb. */
> +#define MT_PACKET_ID_NO_SKB 1
> +#define MT_PACKET_ID_FIRST 2
> +#define MT_PACKET_ID_HAS_RATE BIT(7)
> +#define MT_PACKET_ID_MAX (GENMASK(7, 0) - 1)
> +
> struct mt76_wcid {
> struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS];
>
> @@ -246,6 +254,8 @@ struct mt76_wcid {
>
> struct rate_info rate;
>
> + struct sk_buff *skb_status_array[MT_PACKET_ID_MAX + 1];
You could add this to reduce the struct size:
#define MT_NUM_STATUS_PACKETS \
(MT_PACKET_ID_MAX + 1 - MT_PACKET_ID_FIRST)

And then subtract MT_PACKET_ID_FIRST from cache entries.


> u16 idx;
> u8 hw_key_idx;
> u8 hw_key_idx2;
> @@ -302,13 +312,8 @@ struct mt76_rx_tid {
> #define MT_TX_CB_TXS_DONE BIT(1)
> #define MT_TX_CB_TXS_FAILED BIT(2)
>
> -#define MT_PACKET_ID_MASK GENMASK(6, 0)
> -#define MT_PACKET_ID_NO_ACK 0
> -#define MT_PACKET_ID_NO_SKB 1
> -#define MT_PACKET_ID_FIRST 2
> -#define MT_PACKET_ID_HAS_RATE BIT(7)
> -
> -#define MT_TX_STATUS_SKB_TIMEOUT HZ
> +/* This is timer for when to give up when waiting for TXS callback. */
> +#define MT_TX_STATUS_SKB_TIMEOUT (HZ / 8)
I think the way timeouts are checked now, HZ/8 is way too short.
I would recommend checking timeout only for packets where
MT_TX_CB_DMA_DONE is already set, and setting cb->jiffies from within
__mt76_tx_status_skb_done on DMA completion. That should make it
possible to keep the timeout short without running into it in cases
where significant congestion adds huge completion latency.

> @@ -1297,13 +1303,33 @@ mt76_token_put(struct mt76_dev *dev, int token)
> }
>
> static inline int
> -mt76_get_next_pkt_id(struct mt76_wcid *wcid)
> +mt76_get_next_pkt_id(struct mt76_dev *dev, struct mt76_wcid *wcid,
> + struct sk_buff *skb)
> {
> + struct sk_buff *qskb;
> +
> + lockdep_assert_held(&dev->status_list.lock);
> +
> wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;
> - if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||
> - wcid->packet_id == MT_PACKET_ID_NO_SKB)
> + if (wcid->packet_id < MT_PACKET_ID_FIRST)
> wcid->packet_id = MT_PACKET_ID_FIRST;
>
> + qskb = wcid->skb_status_array[wcid->packet_id];
> + if (qskb) {
> + /* bummer, already waiting on this pid. See if it is stale. */
> + struct mt76_tx_cb *cb = mt76_tx_skb_cb(qskb);
> +
> + if (!time_after(jiffies, cb->jiffies + MT_TX_STATUS_SKB_TIMEOUT)) {
> + /* ok, not stale. Increment pid anyway, will try next
> + * slot next time
> + */
> + return MT_PACKET_ID_NO_SKB;
> + }
> + }
> +
> + /* cache this skb for fast lookup by packet-id */
> + wcid->skb_status_array[wcid->packet_id] = skb;
> +
I think mt76_get_next_pkt_id is not a good place for caching the skb.
Better cache it in the same place that also puts the skb in the status
list: mt76_tx_status_skb_add

That way you can drop your (possibly broken) changes to mt7921, which
calls mt76_get_next_pkt_id directly, but does not support tx status
tracking for skbs.

> diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
> index d9f52e2611a7..8f5702981900 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
> @@ -1318,6 +1318,8 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
>
> mt76_tx_status_lock(mdev, &list);
> skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
> +
> + /* TODO: Gather stats anyway, even if we are not matching on an skb. */
Please drop this comment, since you're deleting in another patch in this
series anyway.

> @@ -1417,10 +1419,14 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
> stats->tx_bw[0]++;
> break;
> }
> +
> + /* Cache rate for packets that don't get a TXS callback for some
> + * reason.
> + */
> wcid->rate = rate;
That comment is wrong, wcid->rate is cached because HE rates can't be
reported via skb->cb due to lack of space.


> diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
> index 6f302acb6e69..4c8504d3c904 100644
> --- a/drivers/net/wireless/mediatek/mt76/tx.c
> +++ b/drivers/net/wireless/mediatek/mt76/tx.c
> @@ -130,15 +154,30 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,
> IEEE80211_TX_CTL_RATE_CTRL_PROBE)))
> return MT_PACKET_ID_NO_SKB;
>
> + /* due to limited range of the pktid (7 bits), we can only
> + * have a limited number of outstanding frames. I think it is OK to
> + * check the length outside of a lock since it doesn't matter too much
> + * if we read wrong data here.
> + * The TX-status callbacks don't always return a callback for an SKB,
> + * so the status_list may contain some stale skbs. Those will be cleaned
> + * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.
> + */
> +
> + qlen = skb_queue_len(&dev->status_list);
> + if (qlen > 120)
> + return MT_PACKET_ID_NO_SKB;
Checking the length of the per-device status list doesn't make sense,
since pktid allocation is per-wcid.

- Felix

2021-08-13 17:14:33

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH v5 04/11] mt76: mt7915: allow processing TXS for 'NO_SKB' pkt-ids

On 2021-08-04 15:44, [email protected] wrote:
> From: Ben Greear <[email protected]>
>
> This will let us update stats and wcid.rate for every TXS
> callback we receive for a particular wcid.
>
> For now, the TXS is not requested for NO_SKB frames, however.
> That will be allowed in next patch.
>
> Signed-off-by: Ben Greear <[email protected]>
> ---
> .../net/wireless/mediatek/mt76/mt7915/mac.c | 18 +++++++++++++-----
> 1 file changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
> index 6f92e207680f..2228dad71657 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
> @@ -1047,6 +1047,7 @@ void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
> txwi[4] = 0;
>
> val = FIELD_PREP(MT_TXD5_PID, pid);
> + /* NOTE: mt7916 does NOT request TXS for 'NO_SKB' frames by default. */
I think this comment can be dropped, given that you're adding extra
checks in the next patch that clarify this.
> if (pid >= MT_PACKET_ID_FIRST)
> val |= MT_TXD5_TX_STATUS_HOST;
> txwi[5] = cpu_to_le32(val);

- Felix

2021-08-13 17:29:22

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH v5 01/11] mt76: add hash lookup for skb on TXS status_list

On 8/13/21 9:50 AM, Felix Fietkau wrote:
>
> On 2021-08-04 15:44, [email protected] wrote:
>> From: Ben Greear <[email protected]>
>>
>> This improves performance when sending lots of frames that
>> are requesting being mapped to a TXS callback.
>>
>> Add comments to help next person understood the tx path
>> better.
>>
>> Signed-off-by: Ben Greear <[email protected]>
>> ---
>>
>> v5: Rebased on top of previous series.
>>
>> drivers/net/wireless/mediatek/mt76/mt76.h | 48 +++++++---
>> .../net/wireless/mediatek/mt76/mt7603/mac.c | 2 +-
>> .../net/wireless/mediatek/mt76/mt7615/mac.c | 2 +-
>> .../net/wireless/mediatek/mt76/mt76x02_mac.c | 2 +-
>> .../net/wireless/mediatek/mt76/mt7915/mac.c | 8 +-
>> .../net/wireless/mediatek/mt76/mt7921/mac.c | 9 +-
>> drivers/net/wireless/mediatek/mt76/tx.c | 90 ++++++++++++++++---
>> 7 files changed, 132 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
>> index 436bf2b8e2cd..016f563fec39 100644
>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
>> @@ -235,6 +235,14 @@ DECLARE_EWMA(signal, 10, 8);
>> #define MT_WCID_TX_INFO_TXPWR_ADJ GENMASK(25, 18)
>> #define MT_WCID_TX_INFO_SET BIT(31)
>>
>> +#define MT_PACKET_ID_MASK GENMASK(6, 0)
>> +#define MT_PACKET_ID_NO_ACK 0
>> +/* Request TXS, but don't try to match with skb. */
>> +#define MT_PACKET_ID_NO_SKB 1
>> +#define MT_PACKET_ID_FIRST 2
>> +#define MT_PACKET_ID_HAS_RATE BIT(7)
>> +#define MT_PACKET_ID_MAX (GENMASK(7, 0) - 1)
>> +
>> struct mt76_wcid {
>> struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS];
>>
>> @@ -246,6 +254,8 @@ struct mt76_wcid {
>>
>> struct rate_info rate;
>>
>> + struct sk_buff *skb_status_array[MT_PACKET_ID_MAX + 1];
> You could add this to reduce the struct size:
> #define MT_NUM_STATUS_PACKETS \
> (MT_PACKET_ID_MAX + 1 - MT_PACKET_ID_FIRST)
>
> And then subtract MT_PACKET_ID_FIRST from cache entries.

That saves two void* bytes of memory, and complicates the code a bit?
I can do the change, just doesn't seem worthwhile to me.

>> u16 idx;
>> u8 hw_key_idx;
>> u8 hw_key_idx2;
>> @@ -302,13 +312,8 @@ struct mt76_rx_tid {
>> #define MT_TX_CB_TXS_DONE BIT(1)
>> #define MT_TX_CB_TXS_FAILED BIT(2)
>>
>> -#define MT_PACKET_ID_MASK GENMASK(6, 0)
>> -#define MT_PACKET_ID_NO_ACK 0
>> -#define MT_PACKET_ID_NO_SKB 1
>> -#define MT_PACKET_ID_FIRST 2
>> -#define MT_PACKET_ID_HAS_RATE BIT(7)
>> -
>> -#define MT_TX_STATUS_SKB_TIMEOUT HZ
>> +/* This is timer for when to give up when waiting for TXS callback. */
>> +#define MT_TX_STATUS_SKB_TIMEOUT (HZ / 8)
> I think the way timeouts are checked now, HZ/8 is way too short.
> I would recommend checking timeout only for packets where
> MT_TX_CB_DMA_DONE is already set, and setting cb->jiffies from within
> __mt76_tx_status_skb_done on DMA completion. That should make it
> possible to keep the timeout short without running into it in cases
> where significant congestion adds huge completion latency.

Ok, I like that idea. What is reasonable timeout from time of DMA done
before we give up on TXS callback?

>
>> @@ -1297,13 +1303,33 @@ mt76_token_put(struct mt76_dev *dev, int token)
>> }
>>
>> static inline int
>> -mt76_get_next_pkt_id(struct mt76_wcid *wcid)
>> +mt76_get_next_pkt_id(struct mt76_dev *dev, struct mt76_wcid *wcid,
>> + struct sk_buff *skb)
>> {
>> + struct sk_buff *qskb;
>> +
>> + lockdep_assert_held(&dev->status_list.lock);
>> +
>> wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;
>> - if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||
>> - wcid->packet_id == MT_PACKET_ID_NO_SKB)
>> + if (wcid->packet_id < MT_PACKET_ID_FIRST)
>> wcid->packet_id = MT_PACKET_ID_FIRST;
>>
>> + qskb = wcid->skb_status_array[wcid->packet_id];
>> + if (qskb) {
>> + /* bummer, already waiting on this pid. See if it is stale. */
>> + struct mt76_tx_cb *cb = mt76_tx_skb_cb(qskb);
>> +
>> + if (!time_after(jiffies, cb->jiffies + MT_TX_STATUS_SKB_TIMEOUT)) {
>> + /* ok, not stale. Increment pid anyway, will try next
>> + * slot next time
>> + */
>> + return MT_PACKET_ID_NO_SKB;
>> + }
>> + }
>> +
>> + /* cache this skb for fast lookup by packet-id */
>> + wcid->skb_status_array[wcid->packet_id] = skb;
>> +
> I think mt76_get_next_pkt_id is not a good place for caching the skb.
> Better cache it in the same place that also puts the skb in the status
> list: mt76_tx_status_skb_add
>
> That way you can drop your (possibly broken) changes to mt7921, which
> calls mt76_get_next_pkt_id directly, but does not support tx status
> tracking for skbs.

Ok, I will try that.

>
>> diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
>> index d9f52e2611a7..8f5702981900 100644
>> --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
>> +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
>> @@ -1318,6 +1318,8 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
>>
>> mt76_tx_status_lock(mdev, &list);
>> skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
>> +
>> + /* TODO: Gather stats anyway, even if we are not matching on an skb. */
> Please drop this comment, since you're deleting in another patch in this
> series anyway.
>
>> @@ -1417,10 +1419,14 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
>> stats->tx_bw[0]++;
>> break;
>> }
>> +
>> + /* Cache rate for packets that don't get a TXS callback for some
>> + * reason.
>> + */
>> wcid->rate = rate;
> That comment is wrong, wcid->rate is cached because HE rates can't be
> reported via skb->cb due to lack of space.

We can update the rate from txs callback, and and from txfree path,
and also from querying the firmware rate-ctrl registers (I think?).
TXS is disabled for most frames by default. txfree gives only some
info, not enough. And polling rate-ctrl registers is slow.

So I think the comment is OK, but I end up modifying the code later anyway,
so I can remove this comment if you prefer.

>
>
>> diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
>> index 6f302acb6e69..4c8504d3c904 100644
>> --- a/drivers/net/wireless/mediatek/mt76/tx.c
>> +++ b/drivers/net/wireless/mediatek/mt76/tx.c
>> @@ -130,15 +154,30 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,
>> IEEE80211_TX_CTL_RATE_CTRL_PROBE)))
>> return MT_PACKET_ID_NO_SKB;
>>
>> + /* due to limited range of the pktid (7 bits), we can only
>> + * have a limited number of outstanding frames. I think it is OK to
>> + * check the length outside of a lock since it doesn't matter too much
>> + * if we read wrong data here.
>> + * The TX-status callbacks don't always return a callback for an SKB,
>> + * so the status_list may contain some stale skbs. Those will be cleaned
>> + * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.
>> + */
>> +
>> + qlen = skb_queue_len(&dev->status_list);
>> + if (qlen > 120)
>> + return MT_PACKET_ID_NO_SKB;
> Checking the length of the per-device status list doesn't make sense,
> since pktid allocation is per-wcid.

Ok, so just remove this code, or should I set some other higher
limit to bound the list?

Thanks,
Ben

>
> - Felix
>


--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com

2021-08-13 17:47:05

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH v5 01/11] mt76: add hash lookup for skb on TXS status_list

On 2021-08-13 19:28, Ben Greear wrote:
> On 8/13/21 9:50 AM, Felix Fietkau wrote:
>>
>> On 2021-08-04 15:44, [email protected] wrote:
>>> From: Ben Greear <[email protected]>
>>>
>>> This improves performance when sending lots of frames that
>>> are requesting being mapped to a TXS callback.
>>>
>>> Add comments to help next person understood the tx path
>>> better.
>>>
>>> Signed-off-by: Ben Greear <[email protected]>
>>> ---
>>>
>>> v5: Rebased on top of previous series.
>>>
>>> drivers/net/wireless/mediatek/mt76/mt76.h | 48 +++++++---
>>> .../net/wireless/mediatek/mt76/mt7603/mac.c | 2 +-
>>> .../net/wireless/mediatek/mt76/mt7615/mac.c | 2 +-
>>> .../net/wireless/mediatek/mt76/mt76x02_mac.c | 2 +-
>>> .../net/wireless/mediatek/mt76/mt7915/mac.c | 8 +-
>>> .../net/wireless/mediatek/mt76/mt7921/mac.c | 9 +-
>>> drivers/net/wireless/mediatek/mt76/tx.c | 90 ++++++++++++++++---
>>> 7 files changed, 132 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
>>> index 436bf2b8e2cd..016f563fec39 100644
>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
>>> @@ -235,6 +235,14 @@ DECLARE_EWMA(signal, 10, 8);
>>> #define MT_WCID_TX_INFO_TXPWR_ADJ GENMASK(25, 18)
>>> #define MT_WCID_TX_INFO_SET BIT(31)
>>>
>>> +#define MT_PACKET_ID_MASK GENMASK(6, 0)
>>> +#define MT_PACKET_ID_NO_ACK 0
>>> +/* Request TXS, but don't try to match with skb. */
>>> +#define MT_PACKET_ID_NO_SKB 1
>>> +#define MT_PACKET_ID_FIRST 2
>>> +#define MT_PACKET_ID_HAS_RATE BIT(7)
>>> +#define MT_PACKET_ID_MAX (GENMASK(7, 0) - 1)
>>> +
>>> struct mt76_wcid {
>>> struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS];
>>>
>>> @@ -246,6 +254,8 @@ struct mt76_wcid {
>>>
>>> struct rate_info rate;
>>>
>>> + struct sk_buff *skb_status_array[MT_PACKET_ID_MAX + 1];
>> You could add this to reduce the struct size:
>> #define MT_NUM_STATUS_PACKETS \
>> (MT_PACKET_ID_MAX + 1 - MT_PACKET_ID_FIRST)
>>
>> And then subtract MT_PACKET_ID_FIRST from cache entries.
>
> That saves two void* bytes of memory, and complicates the code a bit?
> I can do the change, just doesn't seem worthwhile to me.
It's not much more complicated (simple subtraction in very few places),
and the memory saved is per station.

>>> u16 idx;
>>> u8 hw_key_idx;
>>> u8 hw_key_idx2;
>>> @@ -302,13 +312,8 @@ struct mt76_rx_tid {
>>> #define MT_TX_CB_TXS_DONE BIT(1)
>>> #define MT_TX_CB_TXS_FAILED BIT(2)
>>>
>>> -#define MT_PACKET_ID_MASK GENMASK(6, 0)
>>> -#define MT_PACKET_ID_NO_ACK 0
>>> -#define MT_PACKET_ID_NO_SKB 1
>>> -#define MT_PACKET_ID_FIRST 2
>>> -#define MT_PACKET_ID_HAS_RATE BIT(7)
>>> -
>>> -#define MT_TX_STATUS_SKB_TIMEOUT HZ
>>> +/* This is timer for when to give up when waiting for TXS callback. */
>>> +#define MT_TX_STATUS_SKB_TIMEOUT (HZ / 8)
>> I think the way timeouts are checked now, HZ/8 is way too short.
>> I would recommend checking timeout only for packets where
>> MT_TX_CB_DMA_DONE is already set, and setting cb->jiffies from within
>> __mt76_tx_status_skb_done on DMA completion. That should make it
>> possible to keep the timeout short without running into it in cases
>> where significant congestion adds huge completion latency.
>
> Ok, I like that idea. What is reasonable timeout from time of DMA done
> before we give up on TXS callback?
Your value of HZ / 8 seems reasonable to me for that case.

>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
>>> index d9f52e2611a7..8f5702981900 100644
>>> --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
>>> +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
>>> @@ -1318,6 +1318,8 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
>>>
>>> mt76_tx_status_lock(mdev, &list);
>>> skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
>>> +
>>> + /* TODO: Gather stats anyway, even if we are not matching on an skb. */
>> Please drop this comment, since you're deleting in another patch in this
>> series anyway.
>>
>>> @@ -1417,10 +1419,14 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
>>> stats->tx_bw[0]++;
>>> break;
>>> }
>>> +
>>> + /* Cache rate for packets that don't get a TXS callback for some
>>> + * reason.
>>> + */
>>> wcid->rate = rate;
>> That comment is wrong, wcid->rate is cached because HE rates can't be
>> reported via skb->cb due to lack of space.
>
> We can update the rate from txs callback, and and from txfree path,
> and also from querying the firmware rate-ctrl registers (I think?).
> TXS is disabled for most frames by default. txfree gives only some
> info, not enough. And polling rate-ctrl registers is slow.
>
> So I think the comment is OK, but I end up modifying the code later anyway,
> so I can remove this comment if you prefer.
Yes, please do that.

>>> diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
>>> index 6f302acb6e69..4c8504d3c904 100644
>>> --- a/drivers/net/wireless/mediatek/mt76/tx.c
>>> +++ b/drivers/net/wireless/mediatek/mt76/tx.c
>>> @@ -130,15 +154,30 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,
>>> IEEE80211_TX_CTL_RATE_CTRL_PROBE)))
>>> return MT_PACKET_ID_NO_SKB;
>>>
>>> + /* due to limited range of the pktid (7 bits), we can only
>>> + * have a limited number of outstanding frames. I think it is OK to
>>> + * check the length outside of a lock since it doesn't matter too much
>>> + * if we read wrong data here.
>>> + * The TX-status callbacks don't always return a callback for an SKB,
>>> + * so the status_list may contain some stale skbs. Those will be cleaned
>>> + * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.
>>> + */
>>> +
>>> + qlen = skb_queue_len(&dev->status_list);
>>> + if (qlen > 120)
>>> + return MT_PACKET_ID_NO_SKB;
>> Checking the length of the per-device status list doesn't make sense,
>> since pktid allocation is per-wcid.
>
> Ok, so just remove this code, or should I set some other higher
> limit to bound the list?
You could just check for a duplicate skb_status_array entry.

- Felix

2021-08-13 18:03:10

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH v5 01/11] mt76: add hash lookup for skb on TXS status_list

On 8/13/21 10:46 AM, Felix Fietkau wrote:

>>>> diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
>>>> index 6f302acb6e69..4c8504d3c904 100644
>>>> --- a/drivers/net/wireless/mediatek/mt76/tx.c
>>>> +++ b/drivers/net/wireless/mediatek/mt76/tx.c
>>>> @@ -130,15 +154,30 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,
>>>> IEEE80211_TX_CTL_RATE_CTRL_PROBE)))
>>>> return MT_PACKET_ID_NO_SKB;
>>>>
>>>> + /* due to limited range of the pktid (7 bits), we can only
>>>> + * have a limited number of outstanding frames. I think it is OK to
>>>> + * check the length outside of a lock since it doesn't matter too much
>>>> + * if we read wrong data here.
>>>> + * The TX-status callbacks don't always return a callback for an SKB,
>>>> + * so the status_list may contain some stale skbs. Those will be cleaned
>>>> + * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.
>>>> + */
>>>> +
>>>> + qlen = skb_queue_len(&dev->status_list);
>>>> + if (qlen > 120)
>>>> + return MT_PACKET_ID_NO_SKB;
>>> Checking the length of the per-device status list doesn't make sense,
>>> since pktid allocation is per-wcid.
>>
>> Ok, so just remove this code, or should I set some other higher
>> limit to bound the list?
> You could just check for a duplicate skb_status_array entry.

Ok, that will happen anyway when searching for next wcid pkt-id.

The check above was a quick bail-out before locks were acquired.

I'll just remove that qlen check...

Thanks,
Ben

>
> - Felix
>


--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com

2021-08-19 15:16:17

by Kalle Valo

[permalink] [raw]
Subject: Re: [PATCH v5 07/11] mt76: mt7915: add support for tx-overrides

[email protected] writes:

> From: Ben Greear <[email protected]>
>
> Allow setting fix rate on transmit without using full testmode
> logic.

Why?

> txpower, dynbw, retry count is not currently supported.
> And, probably later need additional logic to not apply this
> txo to non-data frames and to smaller frames, to allow
> ARP and such to go through while also forcing test data frames
> to arbitrary tx-rates (rates which very well may not be
> received by peer.)

Please include an example how to use the debugfs file.

IIRC there was a similar rtw88 patch adding a debugfs to force setting
the tx rate. So what's the consensus, are we going to allow each driver
have their own custom interfaces setting tx rates? In my opinion this
should go via a generic nl80211 command, but if people think having
custom tx rate interfaces is ok I guess I need to reconsider.

As this patch needs more discussion, please separate it from rest of the
series so that it can be applied separately.

--
https://patchwork.kernel.org/project/linux-wireless/list/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

2021-08-19 16:21:16

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH v5 07/11] mt76: mt7915: add support for tx-overrides

On 8/19/21 8:08 AM, Kalle Valo wrote:
> [email protected] writes:
>
>> From: Ben Greear <[email protected]>
>>
>> Allow setting fix rate on transmit without using full testmode
>> logic.
>
> Why?

I use it for testing purposes, so I can send specific frame encodings
to see if peer can receive it or not (receiver sensitivity testing).

Others may be interested in similar API to test that their system transmits
specific MCS frames at proper txpower or other regulatory-related stuff.

And to some degree, the underlying logic gives clever people a way to do driver-defined
per-skb rate-control instead of depending on the firmware. I hacked similar thing
into my ath10k and at least one person attempted that as I recall.

>
>> txpower, dynbw, retry count is not currently supported.
>> And, probably later need additional logic to not apply this
>> txo to non-data frames and to smaller frames, to allow
>> ARP and such to go through while also forcing test data frames
>> to arbitrary tx-rates (rates which very well may not be
>> received by peer.)
>
> Please include an example how to use the debugfs file.

It is in the patch:

+ const char buf[] =
+ "This allows specify specif tx rate parameters for all DATA"
+ " frames on a vdev\n"
+ "To set a value, you specify the dev-name and key-value pairs:\n"
+ "tpc=10 sgi=1 mcs=x nss=x pream=x retries=x dynbw=0|1 bw=x enable=0|1\n"
+ "pream: 0=cck, 1=ofdm, 2=HT, 3=VHT, 4=HE_SU\n"
+ "cck-mcs: 0=1Mbps, 1=2Mbps, 3=5.5Mbps, 3=11Mbps\n"
+ "ofdm-mcs: 0=6Mbps, 1=9Mbps, 2=12Mbps, 3=18Mbps, 4=24Mbps, 5=36Mbps,"
+ " 6=48Mbps, 7=54Mbps\n"
+ "tpc is not implemented currently, bw is 0-3 for 20-160\n"
+ " For example, wlan0:\n"
+ "echo \"wlan0 tpc=255 sgi=1 mcs=0 nss=1 pream=3 retries=1 dynbw=0 bw=0"
+ " active=1\" > ...mt76/set_rate_override\n";

>
> IIRC there was a similar rtw88 patch adding a debugfs to force setting
> the tx rate. So what's the consensus, are we going to allow each driver
> have their own custom interfaces setting tx rates? In my opinion this
> should go via a generic nl80211 command, but if people think having
> custom tx rate interfaces is ok I guess I need to reconsider.
>
> As this patch needs more discussion, please separate it from rest of the
> series so that it can be applied separately.

Ok, will do.

Thanks,
Ben


--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com