2019-03-16 20:42:48

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH 1/6] mt76: use mac80211 txq scheduling

Performance improvement and preparation for adding airtime fairness support

Signed-off-by: Felix Fietkau <[email protected]>
---
drivers/net/wireless/mediatek/mt76/dma.c | 6 +-
drivers/net/wireless/mediatek/mt76/mt76.h | 3 +-
drivers/net/wireless/mediatek/mt76/tx.c | 98 ++++++++++-------------
drivers/net/wireless/mediatek/mt76/usb.c | 3 +-
4 files changed, 50 insertions(+), 60 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 7b8a998103d7..09978757e7d1 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -184,9 +184,7 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush)
last = ioread32(&q->regs->dma_idx);
}

- if (!flush)
- mt76_txq_schedule(dev, sq);
- else
+ if (flush)
mt76_dma_sync_idx(dev, q);

wake = wake && q->stopped &&
@@ -199,6 +197,8 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush)

spin_unlock_bh(&q->lock);

+ if (!flush)
+ mt76_txq_schedule(dev, qid);
if (wake)
ieee80211_wake_queue(dev->hw, qid);
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index edff44f32c8e..5d44f721d184 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -224,7 +224,6 @@ struct mt76_wcid {
};

struct mt76_txq {
- struct list_head list;
struct mt76_sw_queue *swq;
struct mt76_wcid *wcid;

@@ -683,7 +682,7 @@ void mt76_txq_remove(struct mt76_dev *dev, struct ieee80211_txq *txq);
void mt76_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
void mt76_stop_tx_queues(struct mt76_dev *dev, struct ieee80211_sta *sta,
bool send_bar);
-void mt76_txq_schedule(struct mt76_dev *dev, struct mt76_sw_queue *sq);
+void mt76_txq_schedule(struct mt76_dev *dev, enum mt76_txq_id qid);
void mt76_txq_schedule_all(struct mt76_dev *dev);
void mt76_release_buffered_frames(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 2c82db0b5834..ef9a0bbd64c1 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -479,23 +479,37 @@ mt76_txq_send_burst(struct mt76_dev *dev, struct mt76_sw_queue *sq,
}

static int
-mt76_txq_schedule_list(struct mt76_dev *dev, struct mt76_sw_queue *sq)
+mt76_txq_schedule_list(struct mt76_dev *dev, enum mt76_txq_id qid)
{
+ struct mt76_sw_queue *sq = &dev->q_tx[qid];
struct mt76_queue *hwq = sq->q;
- struct mt76_txq *mtxq, *mtxq_last;
- int len = 0;
+ struct ieee80211_txq *txq;
+ struct mt76_txq *mtxq;
+ struct mt76_wcid *wcid;
+ int ret = 0;

-restart:
- mtxq_last = list_last_entry(&sq->swq, struct mt76_txq, list);
- while (!list_empty(&sq->swq)) {
+ spin_lock_bh(&hwq->lock);
+ while (1) {
bool empty = false;
- int cur;
+
+ if (sq->swq_queued >= 4)
+ break;

if (test_bit(MT76_OFFCHANNEL, &dev->state) ||
- test_bit(MT76_RESET, &dev->state))
- return -EBUSY;
+ test_bit(MT76_RESET, &dev->state)) {
+ ret = -EBUSY;
+ break;
+ }
+
+ txq = ieee80211_next_txq(dev->hw, qid);
+ if (!txq)
+ break;
+
+ mtxq = (struct mt76_txq *)txq->drv_priv;
+ wcid = mtxq->wcid;
+ if (wcid && test_bit(MT_WCID_FLAG_PS, &wcid->flags))
+ continue;

- mtxq = list_first_entry(&sq->swq, struct mt76_txq, list);
if (mtxq->send_bar && mtxq->aggr) {
struct ieee80211_txq *txq = mtxq_to_txq(mtxq);
struct ieee80211_sta *sta = txq->sta;
@@ -507,38 +521,36 @@ mt76_txq_schedule_list(struct mt76_dev *dev, struct mt76_sw_queue *sq)
spin_unlock_bh(&hwq->lock);
ieee80211_send_bar(vif, sta->addr, tid, agg_ssn);
spin_lock_bh(&hwq->lock);
- goto restart;
}

- list_del_init(&mtxq->list);
-
- cur = mt76_txq_send_burst(dev, sq, mtxq, &empty);
+ ret += mt76_txq_send_burst(dev, sq, mtxq, &empty);
if (!empty)
- list_add_tail(&mtxq->list, &sq->swq);
-
- if (cur < 0)
- return cur;
-
- len += cur;
-
- if (mtxq == mtxq_last)
- break;
+ ieee80211_return_txq(dev->hw, txq);
}
+ spin_unlock_bh(&hwq->lock);

- return len;
+ return ret;
}

-void mt76_txq_schedule(struct mt76_dev *dev, struct mt76_sw_queue *sq)
+void mt76_txq_schedule(struct mt76_dev *dev, enum mt76_txq_id qid)
{
+ struct mt76_sw_queue *sq = &dev->q_tx[qid];
int len;

+ if (qid >= 4)
+ return;
+
+ if (sq->swq_queued >= 4)
+ return;
+
rcu_read_lock();
- do {
- if (sq->swq_queued >= 4 || list_empty(&sq->swq))
- break;

- len = mt76_txq_schedule_list(dev, sq);
+ do {
+ ieee80211_txq_schedule_start(dev->hw, qid);
+ len = mt76_txq_schedule_list(dev, qid);
+ ieee80211_txq_schedule_end(dev->hw, qid);
} while (len > 0);
+
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(mt76_txq_schedule);
@@ -547,13 +559,8 @@ void mt76_txq_schedule_all(struct mt76_dev *dev)
{
int i;

- for (i = 0; i <= MT_TXQ_BK; i++) {
- struct mt76_queue *q = dev->q_tx[i].q;
-
- spin_lock_bh(&q->lock);
- mt76_txq_schedule(dev, &dev->q_tx[i]);
- spin_unlock_bh(&q->lock);
- }
+ for (i = 0; i <= MT_TXQ_BK; i++)
+ mt76_txq_schedule(dev, i);
}
EXPORT_SYMBOL_GPL(mt76_txq_schedule_all);

@@ -575,8 +582,6 @@ void mt76_stop_tx_queues(struct mt76_dev *dev, struct ieee80211_sta *sta,

spin_lock_bh(&hwq->lock);
mtxq->send_bar = mtxq->aggr && send_bar;
- if (!list_empty(&mtxq->list))
- list_del_init(&mtxq->list);
spin_unlock_bh(&hwq->lock);
}
}
@@ -585,24 +590,16 @@ EXPORT_SYMBOL_GPL(mt76_stop_tx_queues);
void mt76_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
{
struct mt76_dev *dev = hw->priv;
- struct mt76_txq *mtxq = (struct mt76_txq *)txq->drv_priv;
- struct mt76_sw_queue *sq = mtxq->swq;
- struct mt76_queue *hwq = sq->q;

if (!test_bit(MT76_STATE_RUNNING, &dev->state))
return;

- spin_lock_bh(&hwq->lock);
- if (list_empty(&mtxq->list))
- list_add_tail(&mtxq->list, &sq->swq);
- mt76_txq_schedule(dev, sq);
- spin_unlock_bh(&hwq->lock);
+ mt76_txq_schedule(dev, txq->ac);
}
EXPORT_SYMBOL_GPL(mt76_wake_tx_queue);

void mt76_txq_remove(struct mt76_dev *dev, struct ieee80211_txq *txq)
{
- struct mt76_queue *hwq;
struct mt76_txq *mtxq;
struct sk_buff *skb;

@@ -610,12 +607,6 @@ void mt76_txq_remove(struct mt76_dev *dev, struct ieee80211_txq *txq)
return;

mtxq = (struct mt76_txq *) txq->drv_priv;
- hwq = mtxq->swq->q;
-
- spin_lock_bh(&hwq->lock);
- if (!list_empty(&mtxq->list))
- list_del_init(&mtxq->list);
- spin_unlock_bh(&hwq->lock);

while ((skb = skb_dequeue(&mtxq->retry_q)) != NULL)
ieee80211_free_txskb(dev->hw, skb);
@@ -626,7 +617,6 @@ void mt76_txq_init(struct mt76_dev *dev, struct ieee80211_txq *txq)
{
struct mt76_txq *mtxq = (struct mt76_txq *) txq->drv_priv;

- INIT_LIST_HEAD(&mtxq->list);
skb_queue_head_init(&mtxq->retry_q);

mtxq->swq = &dev->q_tx[mt76_txq_get_qid(txq)];
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index 27896a435d6c..2dbd8dfd62a0 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -656,7 +656,6 @@ static void mt76u_tx_tasklet(unsigned long data)
dev->drv->tx_complete_skb(dev, i, &entry);
spin_lock_bh(&q->lock);
}
- mt76_txq_schedule(dev, sq);

wake = q->stopped && q->queued < q->ndesc - 8;
if (wake)
@@ -667,6 +666,8 @@ static void mt76u_tx_tasklet(unsigned long data)

spin_unlock_bh(&q->lock);

+ mt76_txq_schedule(dev, i);
+
if (!test_and_set_bit(MT76_READING_STATS, &dev->state))
ieee80211_queue_delayed_work(dev->hw,
&dev->usb.stat_work,
--
2.17.0



2019-03-16 20:42:47

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH 6/6] mt76: only schedule txqs from the tx tasklet

Reduces lock contention from the tx path and improves performance

Signed-off-by: Felix Fietkau <[email protected]>
---
drivers/net/wireless/mediatek/mt76/dma.c | 2 --
drivers/net/wireless/mediatek/mt76/mt7603/dma.c | 2 ++
drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 3 +++
drivers/net/wireless/mediatek/mt76/tx.c | 2 +-
4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 7d8f9b8a81b5..35010915697c 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -205,8 +205,6 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush)

spin_unlock_bh(&q->lock);

- if (!flush)
- mt76_txq_schedule(dev, qid);
if (wake)
ieee80211_wake_queue(dev->hw, qid);
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
index f7e3566c96fd..27e2d9f90553 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
@@ -145,6 +145,8 @@ mt7603_tx_tasklet(unsigned long data)
for (i = MT_TXQ_MCU; i >= 0; i--)
mt76_queue_tx_cleanup(dev, i, false);

+ mt76_txq_schedule_all(&dev->mt76);
+
mt7603_irq_enable(dev, MT_INT_TX_DONE_ALL);
}

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index 1e82b99d0789..7186a382c686 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -211,6 +211,9 @@ static void mt76x02_tx_tasklet(unsigned long data)
mt76_queue_tx_cleanup(dev, i, false);

mt76x02_mac_poll_tx_status(dev, false);
+
+ mt76_txq_schedule_all(&dev->mt76);
+
mt76x02_irq_enable(dev, MT_INT_TX_DONE_ALL);
}

diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 534ad9fb8832..60bbb6561d8f 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -594,7 +594,7 @@ void mt76_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
if (!test_bit(MT76_STATE_RUNNING, &dev->state))
return;

- mt76_txq_schedule(dev, txq->ac);
+ tasklet_schedule(&dev->tx_tasklet);
}
EXPORT_SYMBOL_GPL(mt76_wake_tx_queue);

--
2.17.0


2019-03-16 20:42:47

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH 5/6] mt76: move tx tasklet to struct mt76_dev

Allows it to be scheduled from core code

Signed-off-by: Felix Fietkau <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mt76.h | 3 ++-
drivers/net/wireless/mediatek/mt76/mt7603/core.c | 2 +-
drivers/net/wireless/mediatek/mt76/mt7603/dma.c | 4 ++--
drivers/net/wireless/mediatek/mt76/mt7603/mac.c | 6 +++---
.../net/wireless/mediatek/mt76/mt7603/mt7603.h | 1 -
drivers/net/wireless/mediatek/mt76/mt76x0/usb.c | 2 +-
drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 15 ++++++++-------
drivers/net/wireless/mediatek/mt76/mt76x2/usb.c | 2 +-
drivers/net/wireless/mediatek/mt76/usb.c | 6 +++---
9 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 6bee65edb26a..54e1e50eb4d2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -396,7 +396,6 @@ struct mt76_usb {
u8 data[32];

struct tasklet_struct rx_tasklet;
- struct tasklet_struct tx_tasklet;
struct delayed_work stat_work;

u8 out_ep[__MT_EP_OUT_MAX];
@@ -458,6 +457,8 @@ struct mt76_dev {
const struct mt76_queue_ops *queue_ops;
int tx_dma_idx[4];

+ struct tasklet_struct tx_tasklet;
+
wait_queue_head_t tx_wait;
struct sk_buff_head status_list;

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/core.c b/drivers/net/wireless/mediatek/mt76/mt7603/core.c
index 4668c573f74a..0d06ff67ce44 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/core.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/core.c
@@ -35,7 +35,7 @@ irqreturn_t mt7603_irq_handler(int irq, void *dev_instance)

if (intr & MT_INT_TX_DONE_ALL) {
mt7603_irq_disable(dev, MT_INT_TX_DONE_ALL);
- tasklet_schedule(&dev->tx_tasklet);
+ tasklet_schedule(&dev->mt76.tx_tasklet);
}

if (intr & MT_INT_RX_DONE(0)) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
index 37cedfcedce4..f7e3566c96fd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
@@ -164,7 +164,7 @@ int mt7603_dma_init(struct mt7603_dev *dev)
init_waitqueue_head(&dev->mt76.mmio.mcu.wait);
skb_queue_head_init(&dev->mt76.mmio.mcu.res_q);

- tasklet_init(&dev->tx_tasklet, mt7603_tx_tasklet, (unsigned long)dev);
+ tasklet_init(&dev->mt76.tx_tasklet, mt7603_tx_tasklet, (unsigned long)dev);

mt76_clear(dev, MT_WPDMA_GLO_CFG,
MT_WPDMA_GLO_CFG_TX_DMA_EN |
@@ -224,6 +224,6 @@ void mt7603_dma_cleanup(struct mt7603_dev *dev)
MT_WPDMA_GLO_CFG_RX_DMA_EN |
MT_WPDMA_GLO_CFG_TX_WRITEBACK_DONE);

- tasklet_kill(&dev->tx_tasklet);
+ tasklet_kill(&dev->mt76.tx_tasklet);
mt76_dma_cleanup(&dev->mt76);
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index fb1961ac9dc6..2badfe527397 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -1306,7 +1306,7 @@ static void mt7603_mac_watchdog_reset(struct mt7603_dev *dev)
/* lock/unlock all queues to ensure that no tx is pending */
mt76_txq_schedule_all(&dev->mt76);

- tasklet_disable(&dev->tx_tasklet);
+ tasklet_disable(&dev->mt76.tx_tasklet);
tasklet_disable(&dev->pre_tbtt_tasklet);
napi_disable(&dev->mt76.napi[0]);
napi_disable(&dev->mt76.napi[1]);
@@ -1353,8 +1353,8 @@ static void mt7603_mac_watchdog_reset(struct mt7603_dev *dev)
clear_bit(MT76_RESET, &dev->mt76.state);
mutex_unlock(&dev->mt76.mutex);

- tasklet_enable(&dev->tx_tasklet);
- tasklet_schedule(&dev->tx_tasklet);
+ tasklet_enable(&dev->mt76.tx_tasklet);
+ tasklet_schedule(&dev->mt76.tx_tasklet);

tasklet_enable(&dev->pre_tbtt_tasklet);
mt7603_beacon_set_timer(dev, -1, beacon_int);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
index 9cc8ca7a4d1c..3848a011105f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
@@ -145,7 +145,6 @@ struct mt7603_dev {
unsigned int reset_cause[__RESET_CAUSE_MAX];

struct delayed_work mac_work;
- struct tasklet_struct tx_tasklet;
struct tasklet_struct pre_tbtt_tasklet;
};

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
index 91718647da02..ff887454e51e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
@@ -326,7 +326,7 @@ static int __maybe_unused mt76x0_resume(struct usb_interface *usb_intf)
goto err;

tasklet_enable(&usb->rx_tasklet);
- tasklet_enable(&usb->tx_tasklet);
+ tasklet_enable(&dev->mt76.tx_tasklet);

ret = mt76x0u_init_hardware(dev);
if (ret)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index 736a77936249..1e82b99d0789 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -229,7 +229,8 @@ int mt76x02_dma_init(struct mt76x02_dev *dev)
if (!status_fifo)
return -ENOMEM;

- tasklet_init(&dev->tx_tasklet, mt76x02_tx_tasklet, (unsigned long) dev);
+ tasklet_init(&dev->mt76.tx_tasklet, mt76x02_tx_tasklet,
+ (unsigned long) dev);
tasklet_init(&dev->pre_tbtt_tasklet, mt76x02_pre_tbtt_tasklet,
(unsigned long)dev);

@@ -299,7 +300,7 @@ irqreturn_t mt76x02_irq_handler(int irq, void *dev_instance)

if (intr & MT_INT_TX_DONE_ALL) {
mt76x02_irq_disable(dev, MT_INT_TX_DONE_ALL);
- tasklet_schedule(&dev->tx_tasklet);
+ tasklet_schedule(&dev->mt76.tx_tasklet);
}

if (intr & MT_INT_RX_DONE(0)) {
@@ -325,7 +326,7 @@ irqreturn_t mt76x02_irq_handler(int irq, void *dev_instance)

if (intr & MT_INT_TX_STAT) {
mt76x02_mac_poll_tx_status(dev, true);
- tasklet_schedule(&dev->tx_tasklet);
+ tasklet_schedule(&dev->mt76.tx_tasklet);
}

if (intr & MT_INT_GPTIMER) {
@@ -355,7 +356,7 @@ static void mt76x02_dma_enable(struct mt76x02_dev *dev)

void mt76x02_dma_cleanup(struct mt76x02_dev *dev)
{
- tasklet_kill(&dev->tx_tasklet);
+ tasklet_kill(&dev->mt76.tx_tasklet);
mt76_dma_cleanup(&dev->mt76);
}
EXPORT_SYMBOL_GPL(mt76x02_dma_cleanup);
@@ -476,7 +477,7 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)
set_bit(MT76_RESET, &dev->mt76.state);

tasklet_disable(&dev->pre_tbtt_tasklet);
- tasklet_disable(&dev->tx_tasklet);
+ tasklet_disable(&dev->mt76.tx_tasklet);

for (i = 0; i < ARRAY_SIZE(dev->mt76.napi); i++)
napi_disable(&dev->mt76.napi[i]);
@@ -529,8 +530,8 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)

clear_bit(MT76_RESET, &dev->mt76.state);

- tasklet_enable(&dev->tx_tasklet);
- tasklet_schedule(&dev->tx_tasklet);
+ tasklet_enable(&dev->mt76.tx_tasklet);
+ tasklet_schedule(&dev->mt76.tx_tasklet);

tasklet_enable(&dev->pre_tbtt_tasklet);

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
index 7a5d539873ca..77575727df47 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
@@ -117,7 +117,7 @@ static int __maybe_unused mt76x2u_resume(struct usb_interface *intf)
goto err;

tasklet_enable(&usb->rx_tasklet);
- tasklet_enable(&usb->tx_tasklet);
+ tasklet_enable(&dev->mt76.tx_tasklet);

err = mt76x2u_init_hardware(dev);
if (err < 0)
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index 2dbd8dfd62a0..ac03acdae279 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -713,7 +713,7 @@ static void mt76u_complete_tx(struct urb *urb)
dev_err(dev->dev, "tx urb failed: %d\n", urb->status);
buf->done = true;

- tasklet_schedule(&dev->usb.tx_tasklet);
+ tasklet_schedule(&dev->tx_tasklet);
}

static int
@@ -859,7 +859,7 @@ static void mt76u_stop_tx(struct mt76_dev *dev)
void mt76u_stop_queues(struct mt76_dev *dev)
{
tasklet_disable(&dev->usb.rx_tasklet);
- tasklet_disable(&dev->usb.tx_tasklet);
+ tasklet_disable(&dev->tx_tasklet);

mt76u_stop_rx(dev);
mt76u_stop_tx(dev);
@@ -914,7 +914,7 @@ int mt76u_init(struct mt76_dev *dev,
struct mt76_usb *usb = &dev->usb;

tasklet_init(&usb->rx_tasklet, mt76u_rx_tasklet, (unsigned long)dev);
- tasklet_init(&usb->tx_tasklet, mt76u_tx_tasklet, (unsigned long)dev);
+ tasklet_init(&dev->tx_tasklet, mt76u_tx_tasklet, (unsigned long)dev);
INIT_DELAYED_WORK(&usb->stat_work, mt76u_tx_status_data);
skb_queue_head_init(&dev->rx_skb[MT_RXQ_MAIN]);

--
2.17.0


2019-03-16 20:42:49

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH 3/6] mt76: store wcid tx rate info in one u32 reduce locking

Signed-off-by: Felix Fietkau <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mt76.h | 10 ++++---
.../net/wireless/mediatek/mt76/mt7603/mac.c | 4 +--
.../net/wireless/mediatek/mt76/mt76x02_mac.c | 26 ++++++++++++-------
.../net/wireless/mediatek/mt76/mt76x02_util.c | 1 -
drivers/net/wireless/mediatek/mt76/tx.c | 4 +--
5 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 5d44f721d184..40b3ce01e74d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -196,6 +196,11 @@ enum mt76_wcid_flags {

DECLARE_EWMA(signal, 10, 8);

+#define MT_WCID_TX_INFO_RATE GENMASK(15, 0)
+#define MT_WCID_TX_INFO_NSS GENMASK(17, 16)
+#define MT_WCID_TX_INFO_TXPWR_ADJ GENMASK(25, 18)
+#define MT_WCID_TX_INFO_SET BIT(31)
+
struct mt76_wcid {
struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS];

@@ -214,10 +219,7 @@ struct mt76_wcid {
u8 rx_check_pn;
u8 rx_key_pn[IEEE80211_NUM_TIDS][6];

- __le16 tx_rate;
- bool tx_rate_set;
- u8 tx_rate_nss;
- s8 max_txpwr_adj;
+ u32 tx_info;
bool sw_iv;

u8 packet_id;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 5f800467c628..d6e260ca1423 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -754,11 +754,11 @@ void mt7603_wtbl_set_rates(struct mt7603_dev *dev, struct mt7603_sta *sta,
MT_WTBL_UPDATE_RATE_UPDATE |
MT_WTBL_UPDATE_TX_COUNT_CLEAR);

- if (!sta->wcid.tx_rate_set)
+ if (!(sta->wcid.tx_info & MT_WCID_TX_INFO_SET))
mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000);

sta->rate_count = 2 * MT7603_RATE_RETRY * n_rates;
- sta->wcid.tx_rate_set = true;
+ sta->wcid.tx_info |= MT_WCID_TX_INFO_SET;
}

static enum mt7603_cipher_type
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index df6930a94f74..29dbe18abbc9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -218,10 +218,17 @@ mt76x02_mac_tx_rate_val(struct mt76x02_dev *dev,
void mt76x02_mac_wcid_set_rate(struct mt76x02_dev *dev, struct mt76_wcid *wcid,
const struct ieee80211_tx_rate *rate)
{
- spin_lock_bh(&dev->mt76.lock);
- wcid->tx_rate = mt76x02_mac_tx_rate_val(dev, rate, &wcid->tx_rate_nss);
- wcid->tx_rate_set = true;
- spin_unlock_bh(&dev->mt76.lock);
+ s8 max_txpwr_adj = mt76x02_tx_get_max_txpwr_adj(dev, rate);
+ __le16 rateval;
+ u32 tx_info;
+ s8 nss;
+
+ rateval = mt76x02_mac_tx_rate_val(dev, rate, &nss);
+ tx_info = FIELD_PREP(MT_WCID_TX_INFO_RATE, rateval) |
+ FIELD_PREP(MT_WCID_TX_INFO_NSS, nss) |
+ FIELD_PREP(MT_WCID_TX_INFO_TXPWR_ADJ, max_txpwr_adj) |
+ MT_WCID_TX_INFO_SET;
+ wcid->tx_info = tx_info;
}

void mt76x02_mac_set_short_preamble(struct mt76x02_dev *dev, bool enable)
@@ -323,6 +330,7 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi,
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_tx_rate *rate = &info->control.rates[0];
struct ieee80211_key_conf *key = info->control.hw_key;
+ u32 wcid_tx_info;
u16 rate_ht_mask = FIELD_PREP(MT_RXWI_RATE_PHY, BIT(1) | BIT(2));
u16 txwi_flags = 0;
u8 nss;
@@ -357,16 +365,16 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi,
txwi->eiv = *((__le32 *)&ccmp_pn[4]);
}

- spin_lock_bh(&dev->mt76.lock);
if (wcid && (rate->idx < 0 || !rate->count)) {
- txwi->rate = wcid->tx_rate;
- max_txpwr_adj = wcid->max_txpwr_adj;
- nss = wcid->tx_rate_nss;
+ wcid_tx_info = wcid->tx_info;
+ txwi->rate = FIELD_GET(MT_WCID_TX_INFO_RATE, wcid_tx_info);
+ max_txpwr_adj = FIELD_GET(MT_WCID_TX_INFO_TXPWR_ADJ,
+ wcid_tx_info);
+ nss = FIELD_GET(MT_WCID_TX_INFO_NSS, wcid_tx_info);
} else {
txwi->rate = mt76x02_mac_tx_rate_val(dev, rate, &nss);
max_txpwr_adj = mt76x02_tx_get_max_txpwr_adj(dev, rate);
}
- spin_unlock_bh(&dev->mt76.lock);

txpwr_adj = mt76x02_tx_get_txpwr_adj(dev, dev->mt76.txpower_conf,
max_txpwr_adj);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
index 81d65319d3ea..a6bb71a6ed0d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -562,7 +562,6 @@ void mt76x02_sta_rate_tbl_update(struct ieee80211_hw *hw,
rate.idx = rates->rate[0].idx;
rate.flags = rates->rate[0].flags;
mt76x02_mac_wcid_set_rate(dev, &msta->wcid, &rate);
- msta->wcid.max_txpwr_adj = mt76x02_tx_get_max_txpwr_adj(dev, &rate);
}
EXPORT_SYMBOL_GPL(mt76x02_sta_rate_tbl_update);

diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index ef9a0bbd64c1..534ad9fb8832 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -266,7 +266,7 @@ mt76_tx(struct mt76_dev *dev, struct ieee80211_sta *sta,
skb_set_queue_mapping(skb, qid);
}

- if (!wcid->tx_rate_set)
+ if (!(wcid->tx_info & MT_WCID_TX_INFO_SET))
ieee80211_get_tx_rates(info->control.vif, sta, skb,
info->control.rates, 1);

@@ -412,7 +412,7 @@ mt76_txq_send_burst(struct mt76_dev *dev, struct mt76_sw_queue *sq,
}

info = IEEE80211_SKB_CB(skb);
- if (!wcid->tx_rate_set)
+ if (!(wcid->tx_info & MT_WCID_TX_INFO_SET))
ieee80211_get_tx_rates(txq->vif, txq->sta, skb,
info->control.rates, 1);
tx_rate = info->control.rates[0];
--
2.17.0


2019-03-16 20:42:50

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH 4/6] mt76: store software PN/IV in wcid

Avoids expensive 64-bit atomic access in the data path

Signed-off-by: Felix Fietkau <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mt76.h | 2 ++
drivers/net/wireless/mediatek/mt76/mt7603/mac.c | 6 +++++-
drivers/net/wireless/mediatek/mt76/mt7603/main.c | 1 +
drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 7 ++++++-
drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 7 +++++--
drivers/net/wireless/mediatek/mt76/mt76x02_util.c | 1 +
6 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 40b3ce01e74d..6bee65edb26a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -220,6 +220,8 @@ struct mt76_wcid {
u8 rx_key_pn[IEEE80211_NUM_TIDS][6];

u32 tx_info;
+
+ u64 tx_pn;
bool sw_iv;

u8 packet_id;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index d6e260ca1423..fb1961ac9dc6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -924,7 +924,11 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi,
txwi[3] = cpu_to_le32(val);

if (key) {
- u64 pn = atomic64_inc_return(&key->tx_pn);
+ u64 pn;
+
+ spin_lock(&dev->mt76.lock);
+ pn = ++wcid->tx_pn;
+ spin_unlock(&dev->mt76.lock);

txwi[3] |= cpu_to_le32(MT_TXD3_PN_VALID);
txwi[4] = cpu_to_le32(pn & GENMASK(31, 0));
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
index 7849528db134..3754723190d5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
@@ -472,6 +472,7 @@ mt7603_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
if (cmd == SET_KEY) {
key->hw_key_idx = wcid->idx;
wcid->hw_key_idx = idx;
+ wcid->tx_pn = atomic64_read(&key->tx_pn);
} else {
if (idx == wcid->hw_key_idx)
wcid->hw_key_idx = -1;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index 29dbe18abbc9..8e5f920deef1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -352,7 +352,12 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi,
txwi->wcid = 0xff;

if (wcid && wcid->sw_iv && key) {
- u64 pn = atomic64_inc_return(&key->tx_pn);
+ u64 pn;
+
+ spin_lock(&dev->mt76.lock);
+ pn = ++wcid->tx_pn;
+ spin_unlock(&dev->mt76.lock);
+
ccmp_pn[0] = pn;
ccmp_pn[1] = pn >> 8;
ccmp_pn[2] = 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index ec94d612f53c..736a77936249 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -420,10 +420,13 @@ static void mt76x02_key_sync(struct ieee80211_hw *hw, struct ieee80211_vif *vif,

wcid = (struct mt76_wcid *) sta->drv_priv;

- if (wcid->hw_key_idx != key->keyidx || wcid->sw_iv)
+ if (wcid->hw_key_idx != key->keyidx)
return;

- mt76x02_mac_wcid_sync_pn(dev, wcid->idx, key);
+ if (wcid->sw_iv)
+ atomic64_set(&key->tx_pn, wcid->tx_pn);
+ else
+ mt76x02_mac_wcid_sync_pn(dev, wcid->idx, key);
}

static void mt76x02_reset_state(struct mt76x02_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
index a6bb71a6ed0d..079ac265ef26 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -433,6 +433,7 @@ int mt76x02_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
if (key->flags & IEEE80211_KEY_FLAG_RX_MGMT) {
key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
wcid->sw_iv = true;
+ wcid->tx_pn = atomic64_read(&key->tx_pn);
}
} else {
if (idx == wcid->hw_key_idx) {
--
2.17.0


2019-03-16 20:42:51

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH 2/6] mt76: reduce locking in mt76_dma_tx_cleanup

q->tail can be safely updated without locking, because there is no
concurrent access. If called from outside of the tasklet (for flushing),
the tasklet is always disabled.
q->queued can be safely read without locking, as long as the decrement
happens within the locked section.
This patch allows cleaning up tx packets outside of the section that holds
the queue lock for improved performance

Signed-off-by: Felix Fietkau <[email protected]>
---
drivers/net/wireless/mediatek/mt76/dma.c | 26 ++++++++++++++++--------
1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 09978757e7d1..7d8f9b8a81b5 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -149,31 +149,29 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush)
struct mt76_sw_queue *sq = &dev->q_tx[qid];
struct mt76_queue *q = sq->q;
struct mt76_queue_entry entry;
+ unsigned int n_swq_queued[4] = {};
+ unsigned int n_queued = 0;
bool wake = false;
- int last;
+ int i, last;

if (!q)
return;

- spin_lock_bh(&q->lock);
if (flush)
last = -1;
else
last = ioread32(&q->regs->dma_idx);

- while (q->queued && q->tail != last) {
+ while ((q->queued > n_queued) && q->tail != last) {
mt76_dma_tx_cleanup_idx(dev, q, q->tail, &entry);
if (entry.schedule)
- dev->q_tx[entry.qid].swq_queued--;
+ n_swq_queued[entry.qid]++;

q->tail = (q->tail + 1) % q->ndesc;
- q->queued--;
+ n_queued++;

- if (entry.skb) {
- spin_unlock_bh(&q->lock);
+ if (entry.skb)
dev->drv->tx_complete_skb(dev, qid, &entry);
- spin_lock_bh(&q->lock);
- }

if (entry.txwi) {
mt76_put_txwi(dev, entry.txwi);
@@ -184,6 +182,16 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush)
last = ioread32(&q->regs->dma_idx);
}

+ spin_lock_bh(&q->lock);
+
+ q->queued -= n_queued;
+ for (i = 0; i < ARRAY_SIZE(n_swq_queued); i++) {
+ if (!n_swq_queued[i])
+ continue;
+
+ dev->q_tx[i].swq_queued -= n_swq_queued[i];
+ }
+
if (flush)
mt76_dma_sync_idx(dev, q);

--
2.17.0


2019-03-16 22:28:37

by Toke Høiland-Jørgensen

[permalink] [raw]
Subject: Re: [PATCH 1/6] mt76: use mac80211 txq scheduling

Felix Fietkau <[email protected]> writes:

> Performance improvement and preparation for adding airtime fairness
> support

Great to see this! Do you have a plan for the airtime fairness part?
I.e., how to get the airtime information?

Only one other comment, below.

> Signed-off-by: Felix Fietkau <[email protected]>
> ---
> drivers/net/wireless/mediatek/mt76/dma.c | 6 +-
> drivers/net/wireless/mediatek/mt76/mt76.h | 3 +-
> drivers/net/wireless/mediatek/mt76/tx.c | 98 ++++++++++-------------
> drivers/net/wireless/mediatek/mt76/usb.c | 3 +-
> 4 files changed, 50 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
> index 7b8a998103d7..09978757e7d1 100644
> --- a/drivers/net/wireless/mediatek/mt76/dma.c
> +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> @@ -184,9 +184,7 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush)
> last = ioread32(&q->regs->dma_idx);
> }
>
> - if (!flush)
> - mt76_txq_schedule(dev, sq);
> - else
> + if (flush)
> mt76_dma_sync_idx(dev, q);
>
> wake = wake && q->stopped &&
> @@ -199,6 +197,8 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush)
>
> spin_unlock_bh(&q->lock);
>
> + if (!flush)
> + mt76_txq_schedule(dev, qid);
> if (wake)
> ieee80211_wake_queue(dev->hw, qid);
> }
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index edff44f32c8e..5d44f721d184 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -224,7 +224,6 @@ struct mt76_wcid {
> };
>
> struct mt76_txq {
> - struct list_head list;
> struct mt76_sw_queue *swq;
> struct mt76_wcid *wcid;
>
> @@ -683,7 +682,7 @@ void mt76_txq_remove(struct mt76_dev *dev, struct ieee80211_txq *txq);
> void mt76_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
> void mt76_stop_tx_queues(struct mt76_dev *dev, struct ieee80211_sta *sta,
> bool send_bar);
> -void mt76_txq_schedule(struct mt76_dev *dev, struct mt76_sw_queue *sq);
> +void mt76_txq_schedule(struct mt76_dev *dev, enum mt76_txq_id qid);
> void mt76_txq_schedule_all(struct mt76_dev *dev);
> void mt76_release_buffered_frames(struct ieee80211_hw *hw,
> struct ieee80211_sta *sta,
> diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
> index 2c82db0b5834..ef9a0bbd64c1 100644
> --- a/drivers/net/wireless/mediatek/mt76/tx.c
> +++ b/drivers/net/wireless/mediatek/mt76/tx.c
> @@ -479,23 +479,37 @@ mt76_txq_send_burst(struct mt76_dev *dev, struct mt76_sw_queue *sq,
> }
>
> static int
> -mt76_txq_schedule_list(struct mt76_dev *dev, struct mt76_sw_queue *sq)
> +mt76_txq_schedule_list(struct mt76_dev *dev, enum mt76_txq_id qid)
> {
> + struct mt76_sw_queue *sq = &dev->q_tx[qid];
> struct mt76_queue *hwq = sq->q;
> - struct mt76_txq *mtxq, *mtxq_last;
> - int len = 0;
> + struct ieee80211_txq *txq;
> + struct mt76_txq *mtxq;
> + struct mt76_wcid *wcid;
> + int ret = 0;
>
> -restart:
> - mtxq_last = list_last_entry(&sq->swq, struct mt76_txq, list);
> - while (!list_empty(&sq->swq)) {
> + spin_lock_bh(&hwq->lock);
> + while (1) {
> bool empty = false;
> - int cur;
> +
> + if (sq->swq_queued >= 4)
> + break;
>
> if (test_bit(MT76_OFFCHANNEL, &dev->state) ||
> - test_bit(MT76_RESET, &dev->state))
> - return -EBUSY;
> + test_bit(MT76_RESET, &dev->state)) {
> + ret = -EBUSY;
> + break;
> + }
> +
> + txq = ieee80211_next_txq(dev->hw, qid);
> + if (!txq)
> + break;
> +
> + mtxq = (struct mt76_txq *)txq->drv_priv;
> + wcid = mtxq->wcid;
> + if (wcid && test_bit(MT_WCID_FLAG_PS, &wcid->flags))
> + continue;
>
> - mtxq = list_first_entry(&sq->swq, struct mt76_txq, list);
> if (mtxq->send_bar && mtxq->aggr) {
> struct ieee80211_txq *txq = mtxq_to_txq(mtxq);
> struct ieee80211_sta *sta = txq->sta;
> @@ -507,38 +521,36 @@ mt76_txq_schedule_list(struct mt76_dev *dev, struct mt76_sw_queue *sq)
> spin_unlock_bh(&hwq->lock);
> ieee80211_send_bar(vif, sta->addr, tid, agg_ssn);
> spin_lock_bh(&hwq->lock);
> - goto restart;
> }
>
> - list_del_init(&mtxq->list);
> -
> - cur = mt76_txq_send_burst(dev, sq, mtxq, &empty);
> + ret += mt76_txq_send_burst(dev, sq, mtxq, &empty);
> if (!empty)
> - list_add_tail(&mtxq->list, &sq->swq);
> -
> - if (cur < 0)
> - return cur;
> -
> - len += cur;
> -
> - if (mtxq == mtxq_last)
> - break;
> + ieee80211_return_txq(dev->hw, txq);

The call to ieee80211_return_txq() is really meant to be unconditional.
The TXQ will only actually be scheduled if it still has packets queued.
I know it's slightly more expensive to have the check in mac80211, but
this is what makes it possible to change the implementation without
touching the drivers (such as in the RFC patch I sent earlier that
switches the scheduling algorithm)...

-Toke

2019-03-17 10:44:34

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH 1/6] mt76: use mac80211 txq scheduling

On 2019-03-16 23:28, Toke Høiland-Jørgensen wrote:
> Felix Fietkau <[email protected]> writes:
>
>> Performance improvement and preparation for adding airtime fairness
>> support
>
> Great to see this! Do you have a plan for the airtime fairness part?
> I.e., how to get the airtime information?
Not yet. Still need to investigate what kind of information the hardware
can provide. On a first glance it seems rather limited, so we may have
to approximate based on tx status rates/retry and average packet size.

> The call to ieee80211_return_txq() is really meant to be unconditional.
> The TXQ will only actually be scheduled if it still has packets queued.
> I know it's slightly more expensive to have the check in mac80211, but
> this is what makes it possible to change the implementation without
> touching the drivers (such as in the RFC patch I sent earlier that
> switches the scheduling algorithm)...
I think this API needs to be extended to allow the driver to specify
that it has buffered packets for a txq. Otherwise there's a small window
where the driver has packets for a txq but mac80211 doesn't, and
mac80211 won't schedule the queue in that case.
I'll send a patch for this soon.

- Felix

2019-03-17 11:33:22

by Toke Høiland-Jørgensen

[permalink] [raw]
Subject: Re: [PATCH 1/6] mt76: use mac80211 txq scheduling

Felix Fietkau <[email protected]> writes:

> On 2019-03-16 23:28, Toke Høiland-Jørgensen wrote:
>> Felix Fietkau <[email protected]> writes:
>>
>>> Performance improvement and preparation for adding airtime fairness
>>> support
>>
>> Great to see this! Do you have a plan for the airtime fairness part?
>> I.e., how to get the airtime information?
> Not yet. Still need to investigate what kind of information the hardware
> can provide. On a first glance it seems rather limited, so we may have
> to approximate based on tx status rates/retry and average packet size.

OK, cool. A byte-based estimator can also be useful for preventing dumb
firmware from buffering too much. The Chromium guys did that for ath10k:

https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/588190/13/drivers/net/wireless-4.2/ath/ath10k/mac.c#3826

>> The call to ieee80211_return_txq() is really meant to be unconditional.
>> The TXQ will only actually be scheduled if it still has packets queued.
>> I know it's slightly more expensive to have the check in mac80211, but
>> this is what makes it possible to change the implementation without
>> touching the drivers (such as in the RFC patch I sent earlier that
>> switches the scheduling algorithm)...
> I think this API needs to be extended to allow the driver to specify
> that it has buffered packets for a txq. Otherwise there's a small window
> where the driver has packets for a txq but mac80211 doesn't, and
> mac80211 won't schedule the queue in that case.
> I'll send a patch for this soon.

Right, makes sense. As long as mac80211 is in control over how it will
react to that information (thus allowing to e.g., invert the logic if
needed), I have no objections to extending the API... :)

-Toke

2019-03-17 12:35:00

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH 1/6] mt76: use mac80211 txq scheduling

On 2019-03-17 12:32, Toke Høiland-Jørgensen wrote:
> Felix Fietkau <[email protected]> writes:
>
>> On 2019-03-16 23:28, Toke Høiland-Jørgensen wrote:
>>> Felix Fietkau <[email protected]> writes:
>>>
>>>> Performance improvement and preparation for adding airtime fairness
>>>> support
>>>
>>> Great to see this! Do you have a plan for the airtime fairness part?
>>> I.e., how to get the airtime information?
>> Not yet. Still need to investigate what kind of information the hardware
>> can provide. On a first glance it seems rather limited, so we may have
>> to approximate based on tx status rates/retry and average packet size.
>
> OK, cool. A byte-based estimator can also be useful for preventing dumb
> firmware from buffering too much. The Chromium guys did that for ath10k:
>
> https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/588190/13/drivers/net/wireless-4.2/ath/ath10k/mac.c#3826
Interesting, thanks. I can probably use some ideas from that.

>>> The call to ieee80211_return_txq() is really meant to be unconditional.
>>> The TXQ will only actually be scheduled if it still has packets queued.
>>> I know it's slightly more expensive to have the check in mac80211, but
>>> this is what makes it possible to change the implementation without
>>> touching the drivers (such as in the RFC patch I sent earlier that
>>> switches the scheduling algorithm)...
>> I think this API needs to be extended to allow the driver to specify
>> that it has buffered packets for a txq. Otherwise there's a small window
>> where the driver has packets for a txq but mac80211 doesn't, and
>> mac80211 won't schedule the queue in that case.
>> I'll send a patch for this soon.
>
> Right, makes sense. As long as mac80211 is in control over how it will
> react to that information (thus allowing to e.g., invert the logic if
> needed), I have no objections to extending the API... :)I'm thinking of changing the code to make ieee80211_schedule_txq add the
txq to the list, even if mac80211 does not have any frames buffered for it.

I've looked at ath9k (the only user at the moment), and it seems to call
the function in that way already: at PS wake or tx status time if it has
frames in its internal retry queue.
While it does not match the current documented behavior for that
function, it nicely fits ath9k's currently unfulfilled expectations ;)

- Felix

2019-03-17 22:05:49

by Toke Høiland-Jørgensen

[permalink] [raw]
Subject: Re: [PATCH 1/6] mt76: use mac80211 txq scheduling

Felix Fietkau <[email protected]> writes:

> On 2019-03-17 12:32, Toke Høiland-Jørgensen wrote:
>> Felix Fietkau <[email protected]> writes:
>>
>>> On 2019-03-16 23:28, Toke Høiland-Jørgensen wrote:
>>>> Felix Fietkau <[email protected]> writes:
>>>>
>>>>> Performance improvement and preparation for adding airtime fairness
>>>>> support
>>>>
>>>> Great to see this! Do you have a plan for the airtime fairness part?
>>>> I.e., how to get the airtime information?
>>> Not yet. Still need to investigate what kind of information the hardware
>>> can provide. On a first glance it seems rather limited, so we may have
>>> to approximate based on tx status rates/retry and average packet size.
>>
>> OK, cool. A byte-based estimator can also be useful for preventing dumb
>> firmware from buffering too much. The Chromium guys did that for ath10k:
>>
>> https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/588190/13/drivers/net/wireless-4.2/ath/ath10k/mac.c#3826
> Interesting, thanks. I can probably use some ideas from that.
>
>>>> The call to ieee80211_return_txq() is really meant to be unconditional.
>>>> The TXQ will only actually be scheduled if it still has packets queued.
>>>> I know it's slightly more expensive to have the check in mac80211, but
>>>> this is what makes it possible to change the implementation without
>>>> touching the drivers (such as in the RFC patch I sent earlier that
>>>> switches the scheduling algorithm)...
>>> I think this API needs to be extended to allow the driver to specify
>>> that it has buffered packets for a txq. Otherwise there's a small window
>>> where the driver has packets for a txq but mac80211 doesn't, and
>>> mac80211 won't schedule the queue in that case.
>>> I'll send a patch for this soon.
>>
>> Right, makes sense. As long as mac80211 is in control over how it will
>> react to that information (thus allowing to e.g., invert the logic if
>> needed), I have no objections to extending the API... :)I'm thinking of changing the code to make ieee80211_schedule_txq add the
> txq to the list, even if mac80211 does not have any frames buffered for it.
>
> I've looked at ath9k (the only user at the moment), and it seems to call
> the function in that way already: at PS wake or tx status time if it has
> frames in its internal retry queue.
> While it does not match the current documented behavior for that
> function, it nicely fits ath9k's currently unfulfilled expectations ;)

Heh, fair point :)

-Toke

2019-03-18 10:26:11

by Stanislaw Gruszka

[permalink] [raw]
Subject: Re: [PATCH 4/6] mt76: store software PN/IV in wcid

On Sat, Mar 16, 2019 at 09:42:40PM +0100, Felix Fietkau wrote:
> Avoids expensive 64-bit atomic access in the data path
<snip>
> diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
> index d6e260ca1423..fb1961ac9dc6 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
> @@ -924,7 +924,11 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi,
> txwi[3] = cpu_to_le32(val);
>
> if (key) {
> - u64 pn = atomic64_inc_return(&key->tx_pn);
> + u64 pn;
> +
> + spin_lock(&dev->mt76.lock);
> + pn = ++wcid->tx_pn;
> + spin_unlock(&dev->mt76.lock);

It's interesting that atomic op is more expensive that taking spinlock
and do the operation under spinlock protection. This should not be
the case, atomic ops are provided exactly to avoid locking and should
be faster than spin_lock. Perhaps on architecture where this happen
(presumably MIPS 32bit) either atomic ops assembly code should be fixed
or arch should switch generic lib/atomic64.c .

Stanislaw

2019-03-18 10:37:40

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH 4/6] mt76: store software PN/IV in wcid

On 2019-03-18 11:21, Stanislaw Gruszka wrote:
> On Sat, Mar 16, 2019 at 09:42:40PM +0100, Felix Fietkau wrote:
>> Avoids expensive 64-bit atomic access in the data path
> <snip>
>> diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
>> index d6e260ca1423..fb1961ac9dc6 100644
>> --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
>> +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
>> @@ -924,7 +924,11 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi,
>> txwi[3] = cpu_to_le32(val);
>>
>> if (key) {
>> - u64 pn = atomic64_inc_return(&key->tx_pn);
>> + u64 pn;
>> +
>> + spin_lock(&dev->mt76.lock);
>> + pn = ++wcid->tx_pn;
>> + spin_unlock(&dev->mt76.lock);
>
> It's interesting that atomic op is more expensive that taking spinlock
> and do the operation under spinlock protection. This should not be
> the case, atomic ops are provided exactly to avoid locking and should
> be faster than spin_lock. Perhaps on architecture where this happen
> (presumably MIPS 32bit) either atomic ops assembly code should be fixed
> or arch should switch generic lib/atomic64.c .
The fact that lib/atomic64.c is being used is the problem here.
The generic implementation takes a spin_lock_irqsave, which is more
expensive than the bh lock.

- Felix

2019-03-18 20:09:02

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH 1/6] mt76: use mac80211 txq scheduling

On 2019-03-17 22:59, Toke Høiland-Jørgensen wrote:
> Felix Fietkau <[email protected]> writes:
>> I've looked at ath9k (the only user at the moment), and it seems to call
>> the function in that way already: at PS wake or tx status time if it has
>> frames in its internal retry queue.
>> While it does not match the current documented behavior for that
>> function, it nicely fits ath9k's currently unfulfilled expectations ;)
>
> Heh, fair point :)
I noticed another issue: after the migration to the mac80211 txq
scheduling code, ath9k does not handle stations going to powersave
properly anymore. mac80211 keeps returning txqs for stations that have
gone to sleep and ath9k will send out frames for them.

We could deal with this in the driver and simply not return queues for
stations in PS mode, or mac80211 could actively cancel them once a
station enters PS mode. What do you prefer?

- Felix

2019-03-18 22:21:31

by Toke Høiland-Jørgensen

[permalink] [raw]
Subject: Re: [PATCH 1/6] mt76: use mac80211 txq scheduling

Felix Fietkau <[email protected]> writes:

> On 2019-03-17 22:59, Toke Høiland-Jørgensen wrote:
>> Felix Fietkau <[email protected]> writes:
>>> I've looked at ath9k (the only user at the moment), and it seems to call
>>> the function in that way already: at PS wake or tx status time if it has
>>> frames in its internal retry queue.
>>> While it does not match the current documented behavior for that
>>> function, it nicely fits ath9k's currently unfulfilled expectations ;)
>>
>> Heh, fair point :)
> I noticed another issue: after the migration to the mac80211 txq
> scheduling code, ath9k does not handle stations going to powersave
> properly anymore. mac80211 keeps returning txqs for stations that have
> gone to sleep and ath9k will send out frames for them.

Ah, right. Never did have a good grip on the powersave code...

> We could deal with this in the driver and simply not return queues for
> stations in PS mode, or mac80211 could actively cancel them once a
> station enters PS mode. What do you prefer?

I think the cleanest would be if mac80211 handled it and just
un-scheduled stations when they go to sleep.

BTW, I was just thinking the other day about why the retry queue is kept
around when a station goes to sleep? Isn't the station usually sleeping
pretty long (>100 ms), where it might make more sense to drop things
rather than try again once i comes back?

-Toke

2019-03-18 22:37:13

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH 1/6] mt76: use mac80211 txq scheduling

On 2019-03-18 23:14, Toke Høiland-Jørgensen wrote:
> Felix Fietkau <[email protected]> writes:
>
>> On 2019-03-17 22:59, Toke Høiland-Jørgensen wrote:
>>> Felix Fietkau <[email protected]> writes:
>>>> I've looked at ath9k (the only user at the moment), and it seems to call
>>>> the function in that way already: at PS wake or tx status time if it has
>>>> frames in its internal retry queue.
>>>> While it does not match the current documented behavior for that
>>>> function, it nicely fits ath9k's currently unfulfilled expectations ;)
>>>
>>> Heh, fair point :)
>> I noticed another issue: after the migration to the mac80211 txq
>> scheduling code, ath9k does not handle stations going to powersave
>> properly anymore. mac80211 keeps returning txqs for stations that have
>> gone to sleep and ath9k will send out frames for them.
>
> Ah, right. Never did have a good grip on the powersave code...
>
>> We could deal with this in the driver and simply not return queues for
>> stations in PS mode, or mac80211 could actively cancel them once a
>> station enters PS mode. What do you prefer?
>
> I think the cleanest would be if mac80211 handled it and just
> un-scheduled stations when they go to sleep.
I agree. I'll send a patch tomorrow.
> BTW, I was just thinking the other day about why the retry queue is kept
> around when a station goes to sleep? Isn't the station usually sleeping
> pretty long (>100 ms), where it might make more sense to drop things
> rather than try again once i comes back?
It doesn't always sleep long. It might just be background scanning.
There's no way for the AP to know in advance.
In any case, the A-MPDU receiver reorder window still needs to be
maintained, so dropping frames means we'd have to send a BAR frame to
notify the receiver, which costs airtime as well.

- Felix

2019-03-18 23:05:27

by Toke Høiland-Jørgensen

[permalink] [raw]
Subject: Re: [PATCH 1/6] mt76: use mac80211 txq scheduling

Felix Fietkau <[email protected]> writes:

> On 2019-03-18 23:14, Toke Høiland-Jørgensen wrote:
>> Felix Fietkau <[email protected]> writes:
>>
>>> On 2019-03-17 22:59, Toke Høiland-Jørgensen wrote:
>>>> Felix Fietkau <[email protected]> writes:
>>>>> I've looked at ath9k (the only user at the moment), and it seems to call
>>>>> the function in that way already: at PS wake or tx status time if it has
>>>>> frames in its internal retry queue.
>>>>> While it does not match the current documented behavior for that
>>>>> function, it nicely fits ath9k's currently unfulfilled expectations ;)
>>>>
>>>> Heh, fair point :)
>>> I noticed another issue: after the migration to the mac80211 txq
>>> scheduling code, ath9k does not handle stations going to powersave
>>> properly anymore. mac80211 keeps returning txqs for stations that have
>>> gone to sleep and ath9k will send out frames for them.
>>
>> Ah, right. Never did have a good grip on the powersave code...
>>
>>> We could deal with this in the driver and simply not return queues for
>>> stations in PS mode, or mac80211 could actively cancel them once a
>>> station enters PS mode. What do you prefer?
>>
>> I think the cleanest would be if mac80211 handled it and just
>> un-scheduled stations when they go to sleep.
> I agree. I'll send a patch tomorrow.

Cool, thanks! :)

>> BTW, I was just thinking the other day about why the retry queue is kept
>> around when a station goes to sleep? Isn't the station usually sleeping
>> pretty long (>100 ms), where it might make more sense to drop things
>> rather than try again once i comes back?
> It doesn't always sleep long. It might just be background scanning.
> There's no way for the AP to know in advance.
> In any case, the A-MPDU receiver reorder window still needs to be
> maintained, so dropping frames means we'd have to send a BAR frame to
> notify the receiver, which costs airtime as well.

Right, makes sense; thanks for the explanation.

-Toke