2021-03-15 09:33:16

by Yunsheng Lin

[permalink] [raw]
Subject: [PATCH net-next] net: sched: remove unnecessay lock protection for skb_bad_txq/gso_skb

Currently qdisc_lock(q) is taken before enqueuing and dequeuing
for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is
also taken, which can provide the same protection as qdisc_lock(q).

This patch removes the unnecessay qdisc_lock(q) protection for
lockless qdisc' skb_bad_txq/gso_skb queue.

And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc
besides taking the qdisc_lock(q) when doing the qdisc reset,
some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q)
when checking qdisc status. It is unnecessary to take both lock
while the fast path only take one lock, so this patch also changes
it to only take qdisc_lock(q) for locked qdisc, and only take
qdisc->seqlock for lockless qdisc.

Since qdisc->seqlock is taken for lockless qdisc when calling
qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running
to decide if the lockless qdisc is running.

Signed-off-by: Yunsheng Lin <[email protected]>
---
include/net/sch_generic.h | 2 --
net/sched/sch_generic.c | 72 +++++++++++++----------------------------------
2 files changed, 19 insertions(+), 55 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2d6eb60..0e497ed 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -139,8 +139,6 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)

static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
- if (qdisc->flags & TCQ_F_NOLOCK)
- return spin_is_locked(&qdisc->seqlock);
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
}

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 49eae93..a5f1e3c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -38,7 +38,7 @@ EXPORT_SYMBOL(default_qdisc_ops);
/* Main transmission queue. */

/* Modifications to data participating in scheduling must be protected with
- * qdisc_lock(qdisc) spinlock.
+ * qdisc_lock(qdisc) or qdisc->seqlock spinlock.
*
* The idea is the following:
* - enqueue, dequeue are serialized via qdisc root lock
@@ -51,14 +51,8 @@ EXPORT_SYMBOL(default_qdisc_ops);
static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
{
const struct netdev_queue *txq = q->dev_queue;
- spinlock_t *lock = NULL;
struct sk_buff *skb;

- if (q->flags & TCQ_F_NOLOCK) {
- lock = qdisc_lock(q);
- spin_lock(lock);
- }
-
skb = skb_peek(&q->skb_bad_txq);
if (skb) {
/* check the reason of requeuing without tx lock first */
@@ -77,9 +71,6 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
}
}

- if (lock)
- spin_unlock(lock);
-
return skb;
}

@@ -96,13 +87,6 @@ static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
struct sk_buff *skb)
{
- spinlock_t *lock = NULL;
-
- if (q->flags & TCQ_F_NOLOCK) {
- lock = qdisc_lock(q);
- spin_lock(lock);
- }
-
__skb_queue_tail(&q->skb_bad_txq, skb);

if (qdisc_is_percpu_stats(q)) {
@@ -112,20 +96,10 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++;
}
-
- if (lock)
- spin_unlock(lock);
}

static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
- spinlock_t *lock = NULL;
-
- if (q->flags & TCQ_F_NOLOCK) {
- lock = qdisc_lock(q);
- spin_lock(lock);
- }
-
while (skb) {
struct sk_buff *next = skb->next;

@@ -144,8 +118,7 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)

skb = next;
}
- if (lock)
- spin_unlock(lock);
+
__netif_schedule(q);
}

@@ -207,24 +180,9 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,

*packets = 1;
if (unlikely(!skb_queue_empty(&q->gso_skb))) {
- spinlock_t *lock = NULL;
-
- if (q->flags & TCQ_F_NOLOCK) {
- lock = qdisc_lock(q);
- spin_lock(lock);
- }

skb = skb_peek(&q->gso_skb);

- /* skb may be null if another cpu pulls gso_skb off in between
- * empty check and lock.
- */
- if (!skb) {
- if (lock)
- spin_unlock(lock);
- goto validate;
- }
-
/* skb in gso_skb were already validated */
*validate = false;
if (xfrm_offload(skb))
@@ -243,11 +201,10 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
} else {
skb = NULL;
}
- if (lock)
- spin_unlock(lock);
+
goto trace;
}
-validate:
+
*validate = true;

if ((q->flags & TCQ_F_ONETXQUEUE) &&
@@ -1153,13 +1110,15 @@ static void dev_reset_queue(struct net_device *dev,

if (nolock)
spin_lock_bh(&qdisc->seqlock);
- spin_lock_bh(qdisc_lock(qdisc));
+ else
+ spin_lock_bh(qdisc_lock(qdisc));

qdisc_reset(qdisc);

- spin_unlock_bh(qdisc_lock(qdisc));
if (nolock)
spin_unlock_bh(&qdisc->seqlock);
+ else
+ spin_unlock_bh(qdisc_lock(qdisc));
}

static bool some_qdisc_is_busy(struct net_device *dev)
@@ -1168,20 +1127,27 @@ static bool some_qdisc_is_busy(struct net_device *dev)

for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *dev_queue;
- spinlock_t *root_lock;
struct Qdisc *q;
+ bool nolock;
int val;

dev_queue = netdev_get_tx_queue(dev, i);
q = dev_queue->qdisc_sleeping;

- root_lock = qdisc_lock(q);
- spin_lock_bh(root_lock);
+ nolock = q->flags & TCQ_F_NOLOCK;
+
+ if (nolock)
+ spin_lock_bh(&q->seqlock);
+ else
+ spin_lock_bh(qdisc_lock(q));

val = (qdisc_is_running(q) ||
test_bit(__QDISC_STATE_SCHED, &q->state));

- spin_unlock_bh(root_lock);
+ if (nolock)
+ spin_unlock_bh(&q->seqlock);
+ else
+ spin_unlock_bh(qdisc_lock(q));

if (val)
return true;
--
2.7.4


2021-03-16 06:36:26

by David Miller

[permalink] [raw]
Subject: Re: [PATCH net-next] net: sched: remove unnecessay lock protection for skb_bad_txq/gso_skb

From: Yunsheng Lin <[email protected]>
Date: Mon, 15 Mar 2021 17:30:10 +0800

> Currently qdisc_lock(q) is taken before enqueuing and dequeuing
> for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is
> also taken, which can provide the same protection as qdisc_lock(q).
>
> This patch removes the unnecessay qdisc_lock(q) protection for
> lockless qdisc' skb_bad_txq/gso_skb queue.
>
> And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc
> besides taking the qdisc_lock(q) when doing the qdisc reset,
> some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q)
> when checking qdisc status. It is unnecessary to take both lock
> while the fast path only take one lock, so this patch also changes
> it to only take qdisc_lock(q) for locked qdisc, and only take
> qdisc->seqlock for lockless qdisc.
>
> Since qdisc->seqlock is taken for lockless qdisc when calling
> qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running
> to decide if the lockless qdisc is running.
>
> Signed-off-by: Yunsheng Lin <[email protected]>

What about other things protected by this lock, such as statistics and qlen?

This change looks too risky to me.

2021-03-16 11:23:24

by Yunsheng Lin

[permalink] [raw]
Subject: Re: [PATCH net-next] net: sched: remove unnecessay lock protection for skb_bad_txq/gso_skb

On 2021/3/16 7:41, David Miller wrote:
> From: Yunsheng Lin <[email protected]>
> Date: Mon, 15 Mar 2021 17:30:10 +0800
>
>> Currently qdisc_lock(q) is taken before enqueuing and dequeuing
>> for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is
>> also taken, which can provide the same protection as qdisc_lock(q).
>>
>> This patch removes the unnecessay qdisc_lock(q) protection for
>> lockless qdisc' skb_bad_txq/gso_skb queue.
>>
>> And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc
>> besides taking the qdisc_lock(q) when doing the qdisc reset,
>> some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q)
>> when checking qdisc status. It is unnecessary to take both lock
>> while the fast path only take one lock, so this patch also changes
>> it to only take qdisc_lock(q) for locked qdisc, and only take
>> qdisc->seqlock for lockless qdisc.
>>
>> Since qdisc->seqlock is taken for lockless qdisc when calling
>> qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running
>> to decide if the lockless qdisc is running.
>>
>> Signed-off-by: Yunsheng Lin <[email protected]>
>
> What about other things protected by this lock, such as statistics and qlen?
>
> This change looks too risky to me.

Ok, If that is the case, maybe we just remove qdisc->seqlock and use
qdisc_lock(q) for lockless qdisc too, so that we do not need to worry
about "lockless qdisc' other things protected by qdisc_lock(q)".

At least for the fast path, taking two locks for lockless qdisc hurts
performance when handling requeued skb, especially if the lockless
qdisc supports TCQ_F_CAN_BYPASS.

>
>
> .
>

2021-03-16 21:20:56

by Cong Wang

[permalink] [raw]
Subject: Re: [PATCH net-next] net: sched: remove unnecessay lock protection for skb_bad_txq/gso_skb

On Mon, Mar 15, 2021 at 2:29 AM Yunsheng Lin <[email protected]> wrote:
>
> Currently qdisc_lock(q) is taken before enqueuing and dequeuing
> for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is
> also taken, which can provide the same protection as qdisc_lock(q).
>
> This patch removes the unnecessay qdisc_lock(q) protection for
> lockless qdisc' skb_bad_txq/gso_skb queue.
>
> And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc
> besides taking the qdisc_lock(q) when doing the qdisc reset,
> some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q)
> when checking qdisc status. It is unnecessary to take both lock
> while the fast path only take one lock, so this patch also changes
> it to only take qdisc_lock(q) for locked qdisc, and only take
> qdisc->seqlock for lockless qdisc.
>
> Since qdisc->seqlock is taken for lockless qdisc when calling
> qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running
> to decide if the lockless qdisc is running.

What's the benefit here? Since qdisc->q.lock is also per-qdisc,
so there is no actual contention to take it when we already acquire
q->seqlock, right?

Also, is ->seqlock supposed to be used for protecting skb_bad_txq
etc.? From my understanding, it was introduced merely for replacing
__QDISC_STATE_RUNNING. If you want to extend it, you probably
have to rename it too.

Thanks.

2021-03-16 21:22:15

by Cong Wang

[permalink] [raw]
Subject: Re: [PATCH net-next] net: sched: remove unnecessay lock protection for skb_bad_txq/gso_skb

On Mon, Mar 15, 2021 at 4:42 PM David Miller <[email protected]> wrote:
>
> From: Yunsheng Lin <[email protected]>
> Date: Mon, 15 Mar 2021 17:30:10 +0800
>
> > Currently qdisc_lock(q) is taken before enqueuing and dequeuing
> > for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is
> > also taken, which can provide the same protection as qdisc_lock(q).
> >
> > This patch removes the unnecessay qdisc_lock(q) protection for
> > lockless qdisc' skb_bad_txq/gso_skb queue.
> >
> > And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc
> > besides taking the qdisc_lock(q) when doing the qdisc reset,
> > some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q)
> > when checking qdisc status. It is unnecessary to take both lock
> > while the fast path only take one lock, so this patch also changes
> > it to only take qdisc_lock(q) for locked qdisc, and only take
> > qdisc->seqlock for lockless qdisc.
> >
> > Since qdisc->seqlock is taken for lockless qdisc when calling
> > qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running
> > to decide if the lockless qdisc is running.
> >
> > Signed-off-by: Yunsheng Lin <[email protected]>
>
> What about other things protected by this lock, such as statistics and qlen?
>
> This change looks too risky to me.

They are per-cpu for pfifo_fast which sets TCQ_F_CPUSTATS too.

Thanks.

2021-03-16 22:56:16

by David Miller

[permalink] [raw]
Subject: Re: [PATCH net-next] net: sched: remove unnecessay lock protection for skb_bad_txq/gso_skb

From: Yunsheng Lin <[email protected]>
Date: Tue, 16 Mar 2021 10:40:56 +0800

> On 2021/3/16 7:41, David Miller wrote:
>> From: Yunsheng Lin <[email protected]>
>
> At least for the fast path, taking two locks for lockless qdisc hurts
> performance when handling requeued skb, especially if the lockless
> qdisc supports TCQ_F_CAN_BYPASS.

The bad txq and gro skb cases are not "fast path", sorry

2021-03-17 00:54:27

by Yunsheng Lin

[permalink] [raw]
Subject: Re: [PATCH net-next] net: sched: remove unnecessay lock protection for skb_bad_txq/gso_skb

On 2021/3/17 5:45, David Miller wrote:
> From: Yunsheng Lin <[email protected]>
> Date: Tue, 16 Mar 2021 10:40:56 +0800
>
>> On 2021/3/16 7:41, David Miller wrote:
>>> From: Yunsheng Lin <[email protected]>
>>
>> At least for the fast path, taking two locks for lockless qdisc hurts
>> performance when handling requeued skb, especially if the lockless
>> qdisc supports TCQ_F_CAN_BYPASS.
>
> The bad txq and gro skb cases are not "fast path", sorry

You are right, it is more of exceptional data path, but it is still
ok to clean that up without obvious risk, right?
For I am going to replace qdisc->seqlock with qdisc_lock(q) for lockless
qdisc too and remove qdisc->seqlock, which makes more sense.


>
> .
>

2021-03-17 00:56:28

by Yunsheng Lin

[permalink] [raw]
Subject: Re: [PATCH net-next] net: sched: remove unnecessay lock protection for skb_bad_txq/gso_skb

On 2021/3/17 2:43, Cong Wang wrote:
> On Mon, Mar 15, 2021 at 4:42 PM David Miller <[email protected]> wrote:
>>
>> From: Yunsheng Lin <[email protected]>
>> Date: Mon, 15 Mar 2021 17:30:10 +0800
>>
>>> Currently qdisc_lock(q) is taken before enqueuing and dequeuing
>>> for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is
>>> also taken, which can provide the same protection as qdisc_lock(q).
>>>
>>> This patch removes the unnecessay qdisc_lock(q) protection for
>>> lockless qdisc' skb_bad_txq/gso_skb queue.
>>>
>>> And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc
>>> besides taking the qdisc_lock(q) when doing the qdisc reset,
>>> some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q)
>>> when checking qdisc status. It is unnecessary to take both lock
>>> while the fast path only take one lock, so this patch also changes
>>> it to only take qdisc_lock(q) for locked qdisc, and only take
>>> qdisc->seqlock for lockless qdisc.
>>>
>>> Since qdisc->seqlock is taken for lockless qdisc when calling
>>> qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running
>>> to decide if the lockless qdisc is running.
>>>
>>> Signed-off-by: Yunsheng Lin <[email protected]>
>>
>> What about other things protected by this lock, such as statistics and qlen?
>>
>> This change looks too risky to me.
>
> They are per-cpu for pfifo_fast which sets TCQ_F_CPUSTATS too.

Did you mean qdisc_lock(q) are protecting per-cpu stats for
pfifo_fast too?

>
> Thanks.
>
> .
>

2021-03-17 01:08:56

by Yunsheng Lin

[permalink] [raw]
Subject: Re: [PATCH net-next] net: sched: remove unnecessay lock protection for skb_bad_txq/gso_skb

On 2021/3/17 2:41, Cong Wang wrote:
> On Mon, Mar 15, 2021 at 2:29 AM Yunsheng Lin <[email protected]> wrote:
>>
>> Currently qdisc_lock(q) is taken before enqueuing and dequeuing
>> for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is
>> also taken, which can provide the same protection as qdisc_lock(q).
>>
>> This patch removes the unnecessay qdisc_lock(q) protection for
>> lockless qdisc' skb_bad_txq/gso_skb queue.
>>
>> And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc
>> besides taking the qdisc_lock(q) when doing the qdisc reset,
>> some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q)
>> when checking qdisc status. It is unnecessary to take both lock
>> while the fast path only take one lock, so this patch also changes
>> it to only take qdisc_lock(q) for locked qdisc, and only take
>> qdisc->seqlock for lockless qdisc.
>>
>> Since qdisc->seqlock is taken for lockless qdisc when calling
>> qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running
>> to decide if the lockless qdisc is running.
>
> What's the benefit here? Since qdisc->q.lock is also per-qdisc,
> so there is no actual contention to take it when we already acquire
> q->seqlock, right?

Yes, there is no actual contention to take qdisc->q.lock while
q->seqlock is acquired, but a cleanup or minor optimization.

>
> Also, is ->seqlock supposed to be used for protecting skb_bad_txq
> etc.? From my understanding, it was introduced merely for replacing
> __QDISC_STATE_RUNNING. If you want to extend it, you probably
> have to rename it too.

How about just using qdisc->q.lock for lockless qdisc too and remove
dqisc->seqlock completely?

>
> Thanks.
>
> .
>