Holding the lock around the entire duration of tx scheduling can create
some nasty lock contention, especially when processing airtime information
from the tx status or the rx path.
Improve locking by only holding the active_txq_lock for lookups / scheduling
list modifications.
Signed-off-by: Felix Fietkau <[email protected]>
---
include/net/mac80211.h | 49 ++++++++++++++++--------------------------
net/mac80211/tx.c | 44 ++++++++++++++-----------------------
2 files changed, 35 insertions(+), 58 deletions(-)
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3771625b7a9d..0de0aba580eb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6269,8 +6269,6 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @ac: AC number to return packets from.
*
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
* Returns the next txq if successful, %NULL if no queue is eligible. If a txq
* is returned, it should be returned with ieee80211_return_txq() after the
* driver has finished scheduling it.
@@ -6278,51 +6276,42 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac);
/**
- * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
- *
- * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @txq: pointer obtained from station or virtual interface
- *
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
- */
-void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
-
-/**
- * ieee80211_txq_schedule_start - acquire locks for safe scheduling of an AC
+ * ieee80211_txq_schedule_start - start new scheduling round for TXQs
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @ac: AC number to acquire locks for
*
- * Acquire locks needed to schedule TXQs from the given AC. Should be called
- * before ieee80211_next_txq() or ieee80211_return_txq().
+ * Should be called before ieee80211_next_txq() or ieee80211_return_txq().
+ * The driver must not call multiple TXQ scheduling rounds concurrently.
*/
-void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
- __acquires(txq_lock);
+void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac);
+
+/* (deprecated) */
+static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
+{
+}
/**
- * ieee80211_txq_schedule_end - release locks for safe scheduling of an AC
+ * ieee80211_schedule_txq - schedule a TXQ for transmission
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
- * @ac: AC number to acquire locks for
+ * @txq: pointer obtained from station or virtual interface
*
- * Release locks previously acquired by ieee80211_txq_schedule_end().
+ * Schedules a TXQ for transmission if it is not already scheduled.
*/
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
- __releases(txq_lock);
+void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
/**
- * ieee80211_schedule_txq - schedule a TXQ for transmission
+ * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @txq: pointer obtained from station or virtual interface
- *
- * Schedules a TXQ for transmission if it is not already scheduled. Takes a
- * lock, which means it must *not* be called between
- * ieee80211_txq_schedule_start() and ieee80211_txq_schedule_end()
*/
-void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
- __acquires(txq_lock) __releases(txq_lock);
+static inline void
+ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+{
+ ieee80211_schedule_txq(hw, txq);
+}
/**
* ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ca23abbf5c0b..51cc37802439 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3653,16 +3653,17 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue);
struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_txq *ret = NULL;
struct txq_info *txqi = NULL;
- lockdep_assert_held(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->active_txq_lock[ac]);
begin:
txqi = list_first_entry_or_null(&local->active_txqs[ac],
struct txq_info,
schedule_order);
if (!txqi)
- return NULL;
+ goto out;
if (txqi->txq.sta) {
struct sta_info *sta = container_of(txqi->txq.sta,
@@ -3679,21 +3680,25 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
if (txqi->schedule_round == local->schedule_round[ac])
- return NULL;
+ goto out;
list_del_init(&txqi->schedule_order);
txqi->schedule_round = local->schedule_round[ac];
- return &txqi->txq;
+ ret = &txqi->txq;
+
+out:
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+ return ret;
}
EXPORT_SYMBOL(ieee80211_next_txq);
-void ieee80211_return_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq)
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq)
{
struct ieee80211_local *local = hw_to_local(hw);
struct txq_info *txqi = to_txq_info(txq);
- lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+ spin_lock_bh(&local->active_txq_lock[txq->ac]);
if (list_empty(&txqi->schedule_order) &&
(!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
@@ -3713,17 +3718,7 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
list_add_tail(&txqi->schedule_order,
&local->active_txqs[txq->ac]);
}
-}
-EXPORT_SYMBOL(ieee80211_return_txq);
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq)
- __acquires(txq_lock) __releases(txq_lock)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
- spin_lock_bh(&local->active_txq_lock[txq->ac]);
- ieee80211_return_txq(hw, txq);
spin_unlock_bh(&local->active_txq_lock[txq->ac]);
}
EXPORT_SYMBOL(ieee80211_schedule_txq);
@@ -3736,7 +3731,7 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
struct sta_info *sta;
u8 ac = txq->ac;
- lockdep_assert_held(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->active_txq_lock[ac]);
if (!txqi->txq.sta)
goto out;
@@ -3766,34 +3761,27 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
sta->airtime[ac].deficit += sta->airtime_weight;
list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
+ spin_unlock_bh(&local->active_txq_lock[ac]);
return false;
out:
if (!list_empty(&txqi->schedule_order))
list_del_init(&txqi->schedule_order);
+ spin_unlock_bh(&local->active_txq_lock[ac]);
return true;
}
EXPORT_SYMBOL(ieee80211_txq_may_transmit);
void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
- __acquires(txq_lock)
{
struct ieee80211_local *local = hw_to_local(hw);
spin_lock_bh(&local->active_txq_lock[ac]);
local->schedule_round[ac]++;
-}
-EXPORT_SYMBOL(ieee80211_txq_schedule_start);
-
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
- __releases(txq_lock)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
spin_unlock_bh(&local->active_txq_lock[ac]);
}
-EXPORT_SYMBOL(ieee80211_txq_schedule_end);
+EXPORT_SYMBOL(ieee80211_txq_schedule_start);
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev,
--
2.17.0
Felix Fietkau <[email protected]> writes:
> Holding the lock around the entire duration of tx scheduling can create
> some nasty lock contention, especially when processing airtime information
> from the tx status or the rx path.
> Improve locking by only holding the active_txq_lock for lookups / scheduling
> list modifications.
>
> Signed-off-by: Felix Fietkau <[email protected]>
Acked-by: Toke Høiland-Jørgensen <[email protected]>
> include/net/mac80211.h | 49 ++++++++++++++++--------------------------
> net/mac80211/tx.c | 44 ++++++++++++++-----------------------
> 2 files changed, 35 insertions(+), 58 deletions(-)
>
> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
> index 3771625b7a9d..0de0aba580eb 100644
> --- a/include/net/mac80211.h
> +++ b/include/net/mac80211.h
[ ... ]
> -void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
> - __acquires(txq_lock);
> +void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac);
> +
> +/* (deprecated) */
> +static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
> +{
> +}
I figure I'll post a cleanup of this as part of my reworked schedule
change patch; since I'll be messing around with these bits anyway...
-Toke
> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> index ca23abbf5c0b..51cc37802439 100644
> --- a/net/mac80211/tx.c
> +++ b/net/mac80211/tx.c
> @@ -3653,16 +3653,17 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue);
> struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8
> ac)
> {
> struct ieee80211_local *local = hw_to_local(hw);
> + struct ieee80211_txq *ret = NULL;
> struct txq_info *txqi = NULL;
>
> - lockdep_assert_held(&local->active_txq_lock[ac]);
> + spin_lock_bh(&local->active_txq_lock[ac]);
>
> begin:
> txqi = list_first_entry_or_null(&local->active_txqs[ac],
> struct txq_info,
> schedule_order);
> if (!txqi)
> - return NULL;
> + goto out;
>
> if (txqi->txq.sta) {
> struct sta_info *sta = container_of(txqi->txq.sta,
> @@ -3679,21 +3680,25 @@ struct ieee80211_txq
> *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
>
>
> if (txqi->schedule_round == local->schedule_round[ac])
> - return NULL;
> + goto out;
>
> list_del_init(&txqi->schedule_order);
> txqi->schedule_round = local->schedule_round[ac];
> - return &txqi->txq;
> + ret = &txqi->txq;
> +
> +out:
> + spin_unlock_bh(&local->active_txq_lock[ac]);
> + return ret;
> }
> EXPORT_SYMBOL(ieee80211_next_txq);
>
> -void ieee80211_return_txq(struct ieee80211_hw *hw,
> - struct ieee80211_txq *txq)
> +void ieee80211_schedule_txq(struct ieee80211_hw *hw,
> + struct ieee80211_txq *txq)
> {
> struct ieee80211_local *local = hw_to_local(hw);
> struct txq_info *txqi = to_txq_info(txq);
>
> - lockdep_assert_held(&local->active_txq_lock[txq->ac]);
> + spin_lock_bh(&local->active_txq_lock[txq->ac]);
>
> if (list_empty(&txqi->schedule_order) &&
> (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
> @@ -3713,17 +3718,7 @@ void ieee80211_return_txq(struct ieee80211_hw
> *hw,
> list_add_tail(&txqi->schedule_order,
> &local->active_txqs[txq->ac]);
> }
> -}
> -EXPORT_SYMBOL(ieee80211_return_txq);
>
> -void ieee80211_schedule_txq(struct ieee80211_hw *hw,
> - struct ieee80211_txq *txq)
> - __acquires(txq_lock) __releases(txq_lock)
> -{
> - struct ieee80211_local *local = hw_to_local(hw);
> -
> - spin_lock_bh(&local->active_txq_lock[txq->ac]);
> - ieee80211_return_txq(hw, txq);
> spin_unlock_bh(&local->active_txq_lock[txq->ac]);
> }
> EXPORT_SYMBOL(ieee80211_schedule_txq);
> @@ -3736,7 +3731,7 @@ bool ieee80211_txq_may_transmit(struct
> ieee80211_hw *hw,
> struct sta_info *sta;
> u8 ac = txq->ac;
>
> - lockdep_assert_held(&local->active_txq_lock[ac]);
> + spin_lock_bh(&local->active_txq_lock[ac]);
>
> if (!txqi->txq.sta)
> goto out;
> @@ -3766,34 +3761,27 @@ bool ieee80211_txq_may_transmit(struct
> ieee80211_hw *hw,
>
> sta->airtime[ac].deficit += sta->airtime_weight;
> list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
> + spin_unlock_bh(&local->active_txq_lock[ac]);
>
> return false;
> out:
> if (!list_empty(&txqi->schedule_order))
> list_del_init(&txqi->schedule_order);
> + spin_unlock_bh(&local->active_txq_lock[ac]);
>
> return true;
> }
> EXPORT_SYMBOL(ieee80211_txq_may_transmit);
>
> void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
> - __acquires(txq_lock)
> {
> struct ieee80211_local *local = hw_to_local(hw);
>
> spin_lock_bh(&local->active_txq_lock[ac]);
> local->schedule_round[ac]++;
Hi Felix,
In ath10k, we might have situations like,
CPU0 CPU1
... ieee80211_txq_schedule_start()
... while(ieee80211_next_txq()) {
... driver dequeue packets from
txq
... ieee80211_return_txq()
ieee80211_txq_schedule_start() ...
ieee80211_next_txq() }
driver dequeue packets from txq ieee80211_txq_schedule_end()
ieee80211_return_txq()
ieee80211_txq_schedule_end()
The problem is while CPU1 is looping to dequeue txqs, the schedule_round
which is used to make sure no infinite loop will be changed in CPU0. So
a txq has already been scheduled will be scheduled again then the loop
won't be end at that point as our expected.
> -}
> -EXPORT_SYMBOL(ieee80211_txq_schedule_start);
> -
> -void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
> - __releases(txq_lock)
> -{
> - struct ieee80211_local *local = hw_to_local(hw);
> -
> spin_unlock_bh(&local->active_txq_lock[ac]);
> }
> -EXPORT_SYMBOL(ieee80211_txq_schedule_end);
> +EXPORT_SYMBOL(ieee80211_txq_schedule_start);
>
> void __ieee80211_subif_start_xmit(struct sk_buff *skb,
> struct net_device *dev,
--
Yibo
On 2019-10-10 04:43, Yibo Zhao wrote:
> Hi Felix,
>
> In ath10k, we might have situations like,
>
> CPU0 CPU1
> ... ieee80211_txq_schedule_start()
> ... while(ieee80211_next_txq()) {
> ... driver dequeue packets from
> txq
> ... ieee80211_return_txq()
> ieee80211_txq_schedule_start() ...
> ieee80211_next_txq() }
> driver dequeue packets from txq ieee80211_txq_schedule_end()
> ieee80211_return_txq()
> ieee80211_txq_schedule_end()
>
> The problem is while CPU1 is looping to dequeue txqs, the schedule_round
> which is used to make sure no infinite loop will be changed in CPU0. So
> a txq has already been scheduled will be scheduled again then the loop
> won't be end at that point as our expected.
Hi,
I think this is something that should be fixed in the driver. The
comment for ieee80211_txq_schedule_start() already states:
"The driver must not call multiple TXQ scheduling rounds concurrently"
The way that I'm dealing with this in mt76 is that I ensure that all txq
scheduling is done from a single tasklet, which is scheduled from the tx
status and the queue wake path.
I think that approach should perform well in ath10k as well, and it will
avoid the cost of waiting for a lock on one CPU while the other is
scheduling tx already.
- Felix