LinuxLists.cc - [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

2020-09-15 12:15:07

Subject: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

From: jun qian <[email protected]>

Allow terminating the softirq processing loop without finishing the vectors.

Signed-off-by: jun qian <[email protected]>
---
kernel/softirq.c | 113 ++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 91 insertions(+), 22 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index cbb59b5..29cf079 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -254,6 +254,22 @@ static inline bool __softirq_needs_break(u64 start)
return false;
}

+#define SOFTIRQ_PENDING_MASK ((1UL << NR_SOFTIRQS) - 1)
+
+/*
+ * The pending_next_bit is recorded for the next processing order when
+ * the loop is broken. This per cpu variable is to solve the following
+ * scenarios:
+ * Assume bit 0 and 1 are pending when the processing starts. Now it
+ * breaks out after bit 0 has been handled and stores back bit 1 as
+ * pending. Before ksoftirqd runs bit 0 gets raised again. ksoftirqd
+ * runs and handles bit 0, which takes more than the timeout. As a
+ * result the bit 0 processing can starve all other softirqs.
+ *
+ * so we need the pending_next_bit to record the next process order.
+ */
+DEFINE_PER_CPU(u32, pending_next_bit);
+
asmlinkage __visible void __softirq_entry __do_softirq(void)
{
u64 start = sched_clock();
@@ -261,8 +277,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
unsigned int max_restart = MAX_SOFTIRQ_RESTART;
struct softirq_action *h;
unsigned long pending;
+ unsigned long pending_left, pending_again;
unsigned int vec_nr;
bool in_hardirq;
+ int next_bit;
+ unsigned long flags;

/*
* Mask out PF_MEMALLOC as the current task context is borrowed for the
@@ -283,25 +302,66 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)

local_irq_enable();

- for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
- int prev_count;
-
- __clear_bit(vec_nr, &pending);
-
- h = softirq_vec + vec_nr;
-
- prev_count = preempt_count();
-
- kstat_incr_softirqs_this_cpu(vec_nr);
+ /*
+ * pending_left means that the left bits unhandled when the loop is
+ * broken without finishing the vectors. These bits will be handled
+ * first in the next time. pending_again means that the new bits is
+ * generated in the other time. These bits should be handled after
+ * the pending_left bits have been handled.
+ *
+ * For example
+ * If the pending bits is 1101010110, and the loop is broken after
+ * the bit4 is handled. Then, the pending_next_bit will be 5, and
+ * the pending_left is 1101000000, the pending_again is 000000110.
+ */
+ next_bit = __this_cpu_read(pending_next_bit);
+ pending_left = pending &
+ (SOFTIRQ_PENDING_MASK << next_bit);
+ pending_again = pending &
+ (SOFTIRQ_PENDING_MASK >> (NR_SOFTIRQS - next_bit));
+
+ while (pending_left || pending_again) {
+ if (pending_left) {
+ pending = pending_left;
+ pending_left = 0;
+ } else if (pending_again) {
+ pending = pending_again;
+ pending_again = 0;
+ } else
+ break;
+ for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
+ int prev_count;
+
+ __clear_bit(vec_nr, &pending);
+
+ h = softirq_vec + vec_nr;
+
+ prev_count = preempt_count();
+
+ kstat_incr_softirqs_this_cpu(vec_nr);
+
+ trace_softirq_entry(vec_nr);
+ h->action(h);
+ trace_softirq_exit(vec_nr);
+ if (unlikely(prev_count != preempt_count())) {
+ pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
+ vec_nr, softirq_to_name[vec_nr], h->action,
+ prev_count, preempt_count());
+ preempt_count_set(prev_count);
+ }

- trace_softirq_entry(vec_nr);
- h->action(h);
- trace_softirq_exit(vec_nr);
- if (unlikely(prev_count != preempt_count())) {
- pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
- vec_nr, softirq_to_name[vec_nr], h->action,
- prev_count, preempt_count());
- preempt_count_set(prev_count);
+ /* Allow early break to avoid big sched delay */
+ if (pending && __softirq_needs_break(start)) {
+ __this_cpu_write(pending_next_bit, vec_nr + 1);
+ /*
+ * Ensure that the remaining pending bits will be
+ * handled in the next time.
+ */
+ local_irq_save(flags);
+ or_softirq_pending(pending | pending_again);
+ local_irq_restore(flags);
+ break;
+ }
}
}

@@ -309,12 +369,21 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
rcu_softirq_qs();
local_irq_disable();

- pending = local_softirq_pending();
- if (pending) {
- if (!__softirq_needs_break(start) && --max_restart)
- goto restart;
+ /* get the unhandled bits */
+ pending |= pending_again;
+ if (!pending)
+ /*
+ * If all of the pending bits have been handled,
+ * reset the pending_next_bit to 0.
+ */
+ __this_cpu_write(pending_next_bit, 0);

+ if (pending)
wakeup_softirqd();
+ else if (!__softirq_needs_break(start) && --max_restart) {
+ pending = local_softirq_pending();
+ if (pending)
+ goto restart;
}

lockdep_softirq_end(in_hardirq);
--
1.8.3.1

2020-09-24 15:41:54

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Tue, Sep 15 2020 at 19:56, qianjun kernel wrote:
>
> +#define SOFTIRQ_PENDING_MASK ((1UL << NR_SOFTIRQS) - 1)
>
> +/*
> + * The pending_next_bit is recorded for the next processing order when
> + * the loop is broken. This per cpu variable is to solve the following
> + * scenarios:
> + * Assume bit 0 and 1 are pending when the processing starts. Now it
> + * breaks out after bit 0 has been handled and stores back bit 1 as
> + * pending. Before ksoftirqd runs bit 0 gets raised again. ksoftirqd
> + * runs and handles bit 0, which takes more than the timeout. As a
> + * result the bit 0 processing can starve all other softirqs.
> + *
> + * so we need the pending_next_bit to record the next process order.
> + */
> +DEFINE_PER_CPU(u32, pending_next_bit);

static if at all.

> +
> asmlinkage __visible void __softirq_entry __do_softirq(void)
> {
> u64 start = sched_clock();
> @@ -261,8 +277,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
> unsigned int max_restart = MAX_SOFTIRQ_RESTART;
> struct softirq_action *h;
> unsigned long pending;
> + unsigned long pending_left, pending_again;
> unsigned int vec_nr;
> bool in_hardirq;
> + int next_bit;
> + unsigned long flags;
>
> /*
> * Mask out PF_MEMALLOC as the current task context is borrowed for the
> @@ -283,25 +302,66 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
>
> local_irq_enable();
>
> - for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
> - int prev_count;
> -
> - __clear_bit(vec_nr, &pending);
> -
> - h = softirq_vec + vec_nr;
> -
> - prev_count = preempt_count();
> -
> - kstat_incr_softirqs_this_cpu(vec_nr);
> + /*
> + * pending_left means that the left bits unhandled when the loop is
> + * broken without finishing the vectors. These bits will be handled
> + * first in the next time. pending_again means that the new bits is
> + * generated in the other time. These bits should be handled after
> + * the pending_left bits have been handled.
> + *
> + * For example
> + * If the pending bits is 1101010110, and the loop is broken after
> + * the bit4 is handled. Then, the pending_next_bit will be 5, and
> + * the pending_left is 1101000000, the pending_again is 000000110.
> + */

If you need such a comment to explain the meaning of your variables then
you did something fundamentaly wrong.

> + next_bit = __this_cpu_read(pending_next_bit);
> + pending_left = pending &
> + (SOFTIRQ_PENDING_MASK << next_bit);
> + pending_again = pending &
> + (SOFTIRQ_PENDING_MASK >> (NR_SOFTIRQS - next_bit));
> +
> + while (pending_left || pending_again) {
> + if (pending_left) {
> + pending = pending_left;
> + pending_left = 0;
> + } else if (pending_again) {
> + pending = pending_again;
> + pending_again = 0;
> + } else
> + break;

Aside of lacking brackets how is that 'else' patch ever going to be
reached?

But TBH that whole patch is a completely unreviewable maze.

This can be done without all this pending, pending_left, pending_again,
pending_next_bit, next_bit convolution. It's inconsistent anyway:

__do_softirq()

pending = 0x25;
next = 0;

for (...)
break after bit 0

==> pending == 0x24

==> next = 2

now on the next invocation

pending = 0x35;
next = 2;

So the processing order is 2, 4, 5, 0

and there is nothing you can do about that with that approach.

But the whole point is to ensure that the not yet processed bits are
processed first.

Find attached an updated series based on the original one from Peter
with the authorship preserved, intact SOB chains and proper changelogs.

The last one is new and addressing the starvation issue in a readable
way.

All of this is again completely untested.

Thanks,

tglx

Attachments:

peterz-softirq-fix-loop.patch (1.95 kB)
peterz-softirq-timo.patch (2.12 kB)
peterz-softirq-needs-break.patch (3.05 kB)
peterz-softirq-break-more.patch (2.50 kB)
tglx-softirq-prevent-starvation-of-higher-softirq-vectors.patch (4.02 kB)
Download all attachments

2020-09-24 23:10:47

by Frederic Weisbecker

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Thu, Sep 24, 2020 at 05:37:42PM +0200, Thomas Gleixner wrote:
> Subject: softirq; Prevent starvation of higher softirq vectors
> From: Thomas Gleixner <[email protected]>
> Date: Thu, 24 Sep 2020 10:40:24 +0200
>
> From: Thomas Gleixner <[email protected]>
>
> The early termination of the softirq processing loop can lead to starvation
> of the higher numbered soft interrupt vectors because each run starts at
> the lowest bit. If the loop terminates then the already processed bits can
> be raised again before the next loop starts. If these lower bits run into
> the termination again, then a re-raise might starve the higher bits forever.
>
> To prevent this, store the leftovers of the previous run in the upper 16
> bit of the local softirq_pending storage and ensure that these are
> processed before any newly raised bits are handled.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> kernel/softirq.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 50 insertions(+), 8 deletions(-)
>
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -259,11 +259,23 @@ static inline bool __softirq_needs_break
> return need_resched() || __softirq_timeout(tbreak);
> }
>
> +/*
> + * local_softirq_pending() is split into two 16 bit words. The low word
> + * contains the bits set by raise_softirq(), the high word contains pending
> + * bits which have not been processed in an early terminated run. This is
> + * required to prevent starvation of the higher numbered softirqs.
> + */
> +#define SIRQ_PREV_SHIFT 16

Note that in the case of x86, irq_start.__softirq_pending is a u16.

The origin is there: 9aee5f8a7e30330d0a8f4c626dc924ca5590aba5
"x86/irq: Demote irq_cpustat_t::__softirq_pending to u16"

2020-09-24 23:14:44

by Frederic Weisbecker

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Fri, Sep 25, 2020 at 01:08:11AM +0200, Frederic Weisbecker wrote:
> On Thu, Sep 24, 2020 at 05:37:42PM +0200, Thomas Gleixner wrote:
> > Subject: softirq; Prevent starvation of higher softirq vectors
> > From: Thomas Gleixner <[email protected]>
> > Date: Thu, 24 Sep 2020 10:40:24 +0200
> >
> > From: Thomas Gleixner <[email protected]>
> >
> > The early termination of the softirq processing loop can lead to starvation
> > of the higher numbered soft interrupt vectors because each run starts at
> > the lowest bit. If the loop terminates then the already processed bits can
> > be raised again before the next loop starts. If these lower bits run into
> > the termination again, then a re-raise might starve the higher bits forever.
> >
> > To prevent this, store the leftovers of the previous run in the upper 16
> > bit of the local softirq_pending storage and ensure that these are
> > processed before any newly raised bits are handled.
> >
> > Signed-off-by: Thomas Gleixner <[email protected]>
> > ---
> > kernel/softirq.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++--------
> > 1 file changed, 50 insertions(+), 8 deletions(-)
> >
> > --- a/kernel/softirq.c
> > +++ b/kernel/softirq.c
> > @@ -259,11 +259,23 @@ static inline bool __softirq_needs_break
> > return need_resched() || __softirq_timeout(tbreak);
> > }
> >
> > +/*
> > + * local_softirq_pending() is split into two 16 bit words. The low word
> > + * contains the bits set by raise_softirq(), the high word contains pending
> > + * bits which have not been processed in an early terminated run. This is
> > + * required to prevent starvation of the higher numbered softirqs.
> > + */
> > +#define SIRQ_PREV_SHIFT 16
>
> Note that in the case of x86, irq_start.__softirq_pending is a u16.

irq_stat even

2020-09-25 00:43:45

by Frederic Weisbecker

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Thu, Sep 24, 2020 at 05:37:42PM +0200, Thomas Gleixner wrote:
> Subject: softirq; Prevent starvation of higher softirq vectors
[...]
> + /*
> + * Word swap pending to move the not yet handled bits of the previous
> + * run first and then clear the duplicates in the newly raised ones.
> + */
> + swahw32s(&cur_pending);
> + pending = cur_pending & ~(cur_pending << SIRQ_PREV_SHIFT);
> +
> for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
> int prev_count;
>
> + vec_nr &= SIRQ_VECTOR_MASK;

Shouldn't NR_SOFTIRQS above protect from that?

> __clear_bit(vec_nr, &pending);
> kstat_incr_softirqs_this_cpu(vec_nr);
>
[...]
> + } else {
> + /*
> + * Retain the unprocessed bits and swap @cur_pending back
> + * into normal ordering
> + */
> + cur_pending = (u32)pending;
> + swahw32s(&cur_pending);
> + /*
> + * If the previous bits are done move the low word of
> + * @pending into the high word so it's processed first.
> + */
> + if (!(cur_pending & SIRQ_PREV_MASK))
> + cur_pending <<= SIRQ_PREV_SHIFT;

If the previous bits are done and there is no timeout, should
we consider to restart a loop?

A common case would be to enter do_softirq() with RCU_SOFTIRQ set
in the SIRQ_PREV_MASK and NET_RX_SOFTIRQ set in the normal mask.

You would always end up processing the RCU_SOFTIRQ here and trigger
ksoftirqd for the NET_RX_SOFTIRQ.

Although that's probably no big deal as we should be already in ksoftirqd
if we processed prev bits. We are just going to iterate the kthread loop
instead of the do_softirq loop. Probably no real issue then...

>
> + /* Merge the newly pending ones into the low word */
> + cur_pending |= new_pending;
> + }
> + set_softirq_pending(cur_pending);
> wakeup_softirqd();
> out:
> lockdep_softirq_end(in_hardirq);

2020-09-25 22:39:11

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Fri, Sep 25 2020 at 01:08, Frederic Weisbecker wrote:

> On Thu, Sep 24, 2020 at 05:37:42PM +0200, Thomas Gleixner wrote:
>> Subject: softirq; Prevent starvation of higher softirq vectors
>> From: Thomas Gleixner <[email protected]>
>> Date: Thu, 24 Sep 2020 10:40:24 +0200
>>
>> From: Thomas Gleixner <[email protected]>
>>
>> The early termination of the softirq processing loop can lead to starvation
>> of the higher numbered soft interrupt vectors because each run starts at
>> the lowest bit. If the loop terminates then the already processed bits can
>> be raised again before the next loop starts. If these lower bits run into
>> the termination again, then a re-raise might starve the higher bits forever.
>>
>> To prevent this, store the leftovers of the previous run in the upper 16
>> bit of the local softirq_pending storage and ensure that these are
>> processed before any newly raised bits are handled.
>>
>> Signed-off-by: Thomas Gleixner <[email protected]>
>> ---
>> kernel/softirq.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++--------
>> 1 file changed, 50 insertions(+), 8 deletions(-)
>>
>> --- a/kernel/softirq.c
>> +++ b/kernel/softirq.c
>> @@ -259,11 +259,23 @@ static inline bool __softirq_needs_break
>> return need_resched() || __softirq_timeout(tbreak);
>> }
>>
>> +/*
>> + * local_softirq_pending() is split into two 16 bit words. The low word
>> + * contains the bits set by raise_softirq(), the high word contains pending
>> + * bits which have not been processed in an early terminated run. This is
>> + * required to prevent starvation of the higher numbered softirqs.
>> + */
>> +#define SIRQ_PREV_SHIFT 16
>
> Note that in the case of x86, irq_start.__softirq_pending is a u16.
>
> The origin is there: 9aee5f8a7e30330d0a8f4c626dc924ca5590aba5
> "x86/irq: Demote irq_cpustat_t::__softirq_pending to u16"

Bah, crap. I knew I that and wanted to fix it up but then forgot.

Thanks for reminding me of my slowly upcoming alzheimer!

2020-09-25 22:44:26

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Fri, Sep 25 2020 at 02:42, Frederic Weisbecker wrote:

> On Thu, Sep 24, 2020 at 05:37:42PM +0200, Thomas Gleixner wrote:
>> Subject: softirq; Prevent starvation of higher softirq vectors
> [...]
>> + /*
>> + * Word swap pending to move the not yet handled bits of the previous
>> + * run first and then clear the duplicates in the newly raised ones.
>> + */
>> + swahw32s(&cur_pending);
>> + pending = cur_pending & ~(cur_pending << SIRQ_PREV_SHIFT);
>> +
>> for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
>> int prev_count;
>>
>> + vec_nr &= SIRQ_VECTOR_MASK;
>
> Shouldn't NR_SOFTIRQS above protect from that?

It does, but that's wrong. The bitmap size in that for_each() loop must
obviously be SIRQ_PREV_SHIFT + NR_SOFTIRQS for this to work.

>> + } else {
>> + /*
>> + * Retain the unprocessed bits and swap @cur_pending back
>> + * into normal ordering
>> + */
>> + cur_pending = (u32)pending;
>> + swahw32s(&cur_pending);
>> + /*
>> + * If the previous bits are done move the low word of
>> + * @pending into the high word so it's processed first.
>> + */
>> + if (!(cur_pending & SIRQ_PREV_MASK))
>> + cur_pending <<= SIRQ_PREV_SHIFT;
>
> If the previous bits are done and there is no timeout, should
> we consider to restart a loop?

We only enter this code path if there was a timeout. Otherwise pending
would be 0.

Thanks,

tglx

2020-09-26 02:04:12

by jun qian

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

Thomas Gleixner <[email protected]> 于2020年9月24日周四下午11:37写道：
>
> On Tue, Sep 15 2020 at 19:56, qianjun kernel wrote:
> >
> > +#define SOFTIRQ_PENDING_MASK ((1UL << NR_SOFTIRQS) - 1)
> >
> > +/*
> > + * The pending_next_bit is recorded for the next processing order when
> > + * the loop is broken. This per cpu variable is to solve the following
> > + * scenarios:
> > + * Assume bit 0 and 1 are pending when the processing starts. Now it
> > + * breaks out after bit 0 has been handled and stores back bit 1 as
> > + * pending. Before ksoftirqd runs bit 0 gets raised again. ksoftirqd
> > + * runs and handles bit 0, which takes more than the timeout. As a
> > + * result the bit 0 processing can starve all other softirqs.
> > + *
> > + * so we need the pending_next_bit to record the next process order.
> > + */
> > +DEFINE_PER_CPU(u32, pending_next_bit);
>
> static if at all.
>
> > +
> > asmlinkage __visible void __softirq_entry __do_softirq(void)
> > {
> > u64 start = sched_clock();
> > @@ -261,8 +277,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
> > unsigned int max_restart = MAX_SOFTIRQ_RESTART;
> > struct softirq_action *h;
> > unsigned long pending;
> > + unsigned long pending_left, pending_again;
> > unsigned int vec_nr;
> > bool in_hardirq;
> > + int next_bit;
> > + unsigned long flags;
> >
> > /*
> > * Mask out PF_MEMALLOC as the current task context is borrowed for the
> > @@ -283,25 +302,66 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
> >
> > local_irq_enable();
> >
> > - for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
> > - int prev_count;
> > -
> > - __clear_bit(vec_nr, &pending);
> > -
> > - h = softirq_vec + vec_nr;
> > -
> > - prev_count = preempt_count();
> > -
> > - kstat_incr_softirqs_this_cpu(vec_nr);
> > + /*
> > + * pending_left means that the left bits unhandled when the loop is
> > + * broken without finishing the vectors. These bits will be handled
> > + * first in the next time. pending_again means that the new bits is
> > + * generated in the other time. These bits should be handled after
> > + * the pending_left bits have been handled.
> > + *
> > + * For example
> > + * If the pending bits is 1101010110, and the loop is broken after
> > + * the bit4 is handled. Then, the pending_next_bit will be 5, and
> > + * the pending_left is 1101000000, the pending_again is 000000110.
> > + */
>
> If you need such a comment to explain the meaning of your variables then
> you did something fundamentaly wrong.
>
> > + next_bit = __this_cpu_read(pending_next_bit);
> > + pending_left = pending &
> > + (SOFTIRQ_PENDING_MASK << next_bit);
> > + pending_again = pending &
> > + (SOFTIRQ_PENDING_MASK >> (NR_SOFTIRQS - next_bit));
> > +
> > + while (pending_left || pending_again) {
> > + if (pending_left) {
> > + pending = pending_left;
> > + pending_left = 0;
> > + } else if (pending_again) {
> > + pending = pending_again;
> > + pending_again = 0;
> > + } else
> > + break;
>
> Aside of lacking brackets how is that 'else' patch ever going to be
> reached?
>
> But TBH that whole patch is a completely unreviewable maze.
>
> This can be done without all this pending, pending_left, pending_again,
> pending_next_bit, next_bit convolution. It's inconsistent anyway:
>
> __do_softirq()
>
> pending = 0x25;
> next = 0;
>
> for (...)
> break after bit 0
>
> ==> pending == 0x24
>
> ==> next = 2
>
> now on the next invocation
>
> pending = 0x35;
> next = 2;
>
> So the processing order is 2, 4, 5, 0
>
> and there is nothing you can do about that with that approach.
>
> But the whole point is to ensure that the not yet processed bits are
> processed first.
>
> Find attached an updated series based on the original one from Peter
> with the authorship preserved, intact SOB chains and proper changelogs.
>
> The last one is new and addressing the starvation issue in a readable
> way.
>
> All of this is again completely untested.
>
> Thanks,
>
> tglx
>

I will fix it and test. After test, i will send the patch again.

thanks

2020-09-26 12:23:58

by Frederic Weisbecker

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Sat, Sep 26, 2020 at 12:42:25AM +0200, Thomas Gleixner wrote:
> On Fri, Sep 25 2020 at 02:42, Frederic Weisbecker wrote:
>
> > On Thu, Sep 24, 2020 at 05:37:42PM +0200, Thomas Gleixner wrote:
> >> Subject: softirq; Prevent starvation of higher softirq vectors
> > [...]
> >> + /*
> >> + * Word swap pending to move the not yet handled bits of the previous
> >> + * run first and then clear the duplicates in the newly raised ones.
> >> + */
> >> + swahw32s(&cur_pending);
> >> + pending = cur_pending & ~(cur_pending << SIRQ_PREV_SHIFT);
> >> +
> >> for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
> >> int prev_count;
> >>
> >> + vec_nr &= SIRQ_VECTOR_MASK;
> >
> > Shouldn't NR_SOFTIRQS above protect from that?
>
> It does, but that's wrong. The bitmap size in that for_each() loop must
> obviously be SIRQ_PREV_SHIFT + NR_SOFTIRQS for this to work.

Ah! I see, I thought you were ignoring the high bits on
purpose, hence my questions after about pending.

>
> >> + } else {
> >> + /*
> >> + * Retain the unprocessed bits and swap @cur_pending back
> >> + * into normal ordering
> >> + */
> >> + cur_pending = (u32)pending;
> >> + swahw32s(&cur_pending);
> >> + /*
> >> + * If the previous bits are done move the low word of
> >> + * @pending into the high word so it's processed first.
> >> + */
> >> + if (!(cur_pending & SIRQ_PREV_MASK))
> >> + cur_pending <<= SIRQ_PREV_SHIFT;
> >
> > If the previous bits are done and there is no timeout, should
> > we consider to restart a loop?
>
> We only enter this code path if there was a timeout. Otherwise pending
> would be 0.

Right with SIRQ_PREV_SHIFT + NR_SOFTIRQS now that whole makes sense!

Thanks!

2020-09-27 01:31:40

by Chen, Rong A

[permalink] [raw]

Subject: [softirq] 56c21abbe6: will-it-scale.per_process_ops -9.1% regression

Greeting,

FYI, we

commit: url: in testcase: will-it-scale
on test with following parameters:

nr_task: 50%
mode: process
test: unlink2
cpufreq_governor: performance
ucode: 0x5002f01

test-description: test-url:

If you fix Reported-by:

Details are as below:
-----------------

To reproduce:

git clone bin/lkp run job.yaml

================= compiler/cpufreq_ gcc-9/performan
commit:
c5efd3f36b 56c21abbe6
c5efd3f36ba40e32 ---------------- %stddev \ 6755 648583 3.32 ± 16% 38.54 ± 2% 6507 ± 3% 118524 ± 6% 154132 ± 12% 13238 ± 14% 48.25 50.75 4195 38678 119432 8384131 8290771 37833465 37858309 63272 ± 2% 2026017 ± 3% 2044640 ± 2% 2062943 2081620 2050083 2078050 2118742 ± 2% 2146693 ± 2% 1501571 ± 4% 1428280 ± 4% 1469916 ± 5% 1359600 ± 6% 110316 ± 12% 29574 ± 6% 1532952 ± 5% 1430484 ± 7% 3308 ± 11% 1512439 ± 6% 1427122 ± 5% 8843 8843 8974 8974 211888 3320 212563 3320 82554 1296 83019 1296 18192 18192 171254 3728 171549 3728 21010 21021 0.91 ± 22% 0.91 ± 23% 0.88 ± 22% 0.78 ± 6% 0.78 ± 6% 2.58 ± 9% 2.85 ± 5% 0.00 0.00 0.00 0.00 1.06 ± 23% 1.08 ± 23% 1.08 ± 23% 1.08 ± 23% 1.08 ± 23% 0.00 0.00 0.60 ± 27% 0.29 ± 21% 0.32 ± 21% 0.32 ± 21% 0.18 ± 10% 0.24 ± 25% 0.21 ± 29% 0.17 ± 36% 0.13 ± 14% 0.14 ± 12% 0.16 ± 4% 0.18 ± 26% 2.91 ± 9% 3.23 ± 5% 4.45 ± 21% 4.45 ± 21% 4.45 ± 21% 4.36 ± 21% 1.58 ± 19% 1.58 ± 19% 1.94 ± 17% 1.92 ± 17% 1.91 ± 17% 0.20 ± 77% 0.11 ± 14% 0.11 ± 7% 0.34 ± 52% 1.85 ± 8% 1.00 ±110% 7.77e+09 27098166 1.282e+08 ± 7% 2.269e+08 ± 4% 4176 9.13 3.066e+11 438.98 1192843 ± 7% 8.892e+09 0.01 ± 2% 151589 ± 2% 2.138e+09 15422532 ± 2% 4135008 ± 2% 3.356e+10 0.11 ± 2% 1.60 0.83 ± 2% 99.36 89.17 3005728 ± 3% 83.60 15867919 ± 3% 3099149 ± 2% 9.14 0.01 ± 3% 0.11 89.11 83.54 15568616 7.743e+09 26987276 1.278e+08 ± 7% 2.261e+08 ± 4% 4146 3.056e+11 435.41 1188975 ± 7% 8.86e+09 151390 ± 2% 2.131e+09 15372733 ± 2% 4119220 ± 2% 3.344e+10 3008402 ± 3% 15811834 ± 3% 3114607 ± 2% 1.01e+13 160152 ± 49% 3681748 ± 19% 689554 ± 27% 76842 71761 824.90 ± 17% 26728 ± 37% 537605 ± 39% 96450 ± 28% 23.13 ± 10% 44.84 ± 9% 160152 ± 49% 3681748 ± 19% 689554 ± 27% 7548773 6982694 82109 ± 16% 0.46 ± 3% 1.46 ± 9% 0.49 ± 2% 318.90 ± 19% 367.83 ± 17% 273.04 ± 22% 18.52 ± 4% 476.90 477.76 33460 ±280% +14366.1% 278930 ± 43% -287806 82151 ± 17% 459.01 460.21 192.14 211.12 724.27 ± 4% 1.46 ± 9% 4979 3162 ± 4% 2160 ± 4% 3319 2269 1679 ± 6% 1109 5848 ± 10% 596.08 ± 2% 837.36 ± 6% 1477 5591 ± 19% 977.79 801.84 ± 5% 1099 1816 ± 3% 906.71 127.09 ± 4% 43524 23136 46628 45964 46416 46277 46217 46178 46619 46742 19962 ± 3% 19808 ± 3% 46640 19967 ± 2% 5284 ±123% 46584 46084 46262 46238 46338 46691 19859 ± 3% 46314 46095 46195 45915 46769 19920 ± 3% 19931 ± 2% 20194 ± 2% 19988 ± 2% 46965 46437 46825 20180 ± 3% 20142 ± 3% 46307 19982 ± 2% 20103 ± 2% 20275 ± 2% 20060 ± 3% 20049 ± 2% 46197 20152 ± 2% 20005 ± 3% 20065 ± 2% 20132 ± 2% 20106 ± 2% 20208 ± 3% 46604 45613 20084 ± 2% 46997 48025 ± 5% 46556 46736 46587 46733 46749 46931 46684 47299 47053 19706 ± 2% 19748 ± 2% 47043 47237 46703 46930 19796 ± 3% 46865 47225 46585 46900 46909 46686 46598 46447 46565 46849 47140 46994 47098 19884 ± 2% 47227 47037 46823 19880 ± 3% 46779 46511 20115 ± 2% 47172 46545 46218 46684 19891 ± 2% 46495 46681 46863 46501 46438 20145 ± 2% 46477 46584 47011 46533 19809 ± 2% 46519 20033 ± 4% 46981 19901 ± 3% 46311 8941907 74812 9395 ±119% 6611 ± 9% 6611 ± 9% 512.25 ± 3% 241.75 ± 14% 228.50 ± 10% 6502 ± 10% 6502 ± 10% 213.00 ± 3% 230.75 ± 9% 215.25 ± 4% 210.00 ± 3% 230.50 ± 15% 214.25 908.25 ± 10% 254.50 ± 17% 212.50 ± 2% 228.00 ± 14% 210.75 ± 2% 206.00 ± 2% 6562 ± 11% 6562 ± 11% 214.00 ± 2% 215.75 ± 3% 6590 ± 9% 6590 ± 9% 219.00 6427 ± 11% 6427 ± 11% 239.50 ± 17% 222.50 ± 11% 236.75 ± 14% 223.50 ± 12% 215.75 ± 2% 233.25 ± 9% 232.50 ± 12% 226.00 ± 5% 9395 ±119% 209.00 5697 ± 22% 5697 ± 22% 224.25 ± 2% 218.25 ± 14% 206.50 ± 3% 5766 ± 22% 5766 ± 22% 223.75 ± 15% 214.25 ± 4% 206.75 ± 2% 231.00 ± 16% 221.00 ± 10% 210.50 205.50 ± 5% 6581 ± 10% 6581 ± 10% 202.00 ± 4% 215.25 ± 12% 214.00 ± 3% 218.50 ± 11% 6495 ± 11% 6495 ± 11% 206.00 ± 5% 875.75 ± 6% 222.50 ± 13% 206.00 ± 5% 226.75 ± 13% 225.25 ± 13% 215.75 ± 12% 208.50 ± 3% 199.00 ± 4% 230.25 ± 9% 159.00 ±164% 208.50 ± 14% 6628 ± 9% 6628 ± 9% 213.50 ± 23% 192.75 ± 5% 195.50 ± 13% 187.50 ± 12% 214.50 ± 2% 168.25 ± 15% 142.75 ± 11% 160.00 ± 26% 124.00 ± 9% 121.75 ± 18% 109.25 ± 21% 103.75 ± 19% 80.25 ± 23% 226.50 ± 9% 81.50 ± 21% 81.50 ± 34% 894.50 ± 5% 96.75 ± 33% 206.75 ± 5% 78.25 ± 26% 901.50 ± 4% 79.75 ± 38% 212.25 ± 4% 86.25 ± 34% 1058 ± 19% 214.00 ± 3% 206.50 ± 5% 205.00 ± 2% 237.75 ± 17% 208.25 ± 5% 209.00 ± 4% 212.00 209.25 ± 4% 6511 ± 11% 6511 ± 11% 221.50 ± 13% 200.75 ± 3% 224.25 ± 13% 215.50 205.00 ± 2% 219.00 ± 14% 210.50 ± 5% 6547 ± 11% 6547 ± 11% 214.75 ± 18% 210.00 ± 2% 214.75 ± 2% 6451 ± 11% 6451 ± 11% 205.50 ± 4% 165.50 ±162% 228.00 ± 15% 234.75 ± 6% 205.25 ± 6% 207.75 ± 3% 227.25 ± 10% 4912 ± 34% 4912 ± 34% 211.50 5753 ± 29% 5753 ± 29% 206.00 ± 2% 6443 ± 12% 6443 ± 12% 226.25 ± 12% 994.00 ± 16% 267.00 ± 21% 847.25 ± 6% 226.00 ± 12% 6641 ± 10% 6641 ± 10% 209.00 933.00 ± 7% 6631 ± 9% 6631 ± 9% 273.25 ± 15% 891.75 ± 3% 6554 ± 11% 6554 ± 11% 237.25 ± 12% 209.50 ± 7% 205.00 ± 4% 229.00 ± 11% 218.25 ± 6% 885.75 ± 6% 244.00 ± 13% 246.50 ± 24% 880.50 ± 4% 235.00 ± 14% 901.75 ± 5% 243.50 ± 9% 871.50 ± 3% 244.25 ± 15% 228.75 ± 12% 223.25 ± 11% 228.00 ± 13% 222.75 ± 12% 930.50 ± 7% 256.75 ± 17% 226.00 ± 13% 204.50 ± 3% 231.50 ± 11% 889.25 ± 5% 233.50 ± 9% 222.50 ± 2% 214.50 ± 4% 219.25 ± 3% 214.00 225.50 ± 14% 5705 ± 23% 5705 ± 23% 918.50 ± 5% 243.50 ± 8% 229.00 ± 12% 953.75 ± 9% 208.00 927.25 ± 12% 231.75 ± 14% 6546 ± 10% 6546 ± 10% 230.50 ± 13% 224.00 ± 13% 242.50 ± 20% 5676 ± 23% 5676 ± 23% 210.00 ± 4% 5725 ± 23% 5725 ± 23% 259.75 ± 23% 211.75 ± 3% 210.50 ± 3% 228.25 ± 9% 218.25 ± 4% 216.50 ± 2% 6560 ± 11% 6560 ± 11% 217.25 ± 3% 215.75 ± 2% 38197

7200 +--------- | 7000 |-+ +.. | : 6800 |-+ : +.. | .+ 6600 |.+ | 6400 |-+ | 6200 |-+O O | 6000 |-+ | 5800 +--------- [*] bisect-good sample
[O] bisect-bad sample

Disclaimer:
Results for informational design or

Thanks,
Rong Chen

noticed a -9.1% regression of will-it-scale.per_process_ops due to commit:
56c21abbe67ac004dcea51b34fe43e7542563967 ("[PATCH V7 4/4] softirq: Allow early break the softirq processing loop")
href="https://github.com/0day-ci/linux/commits/qianjun-kernel-gmail-com/Softirq-avoid-large-sched-delay-from-the-pending-softirqs/20200915-202119">https://github.com/0day-ci/linux/commits/qianjun-kernel-gmail-com/Softirq-avoid-large-sched-delay-from-the-pending-softirqs/20200915-202119
href="https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git">https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git fc4f28bb3daf3265d6bc5f73b497306985bb23ab
machine: 192 threads Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz with 192G memory
Will It Scale takes a testcase and runs it from 1 through to n parallel copies to see if the testcase will scale. It builds both a process and threads based test in order to see any differences between the two.
href="https://github.com/antonblanchard/will-it-scale">https://github.com/antonblanchard/will-it-scale
the issue, kindly add following tag
kernel test robot <[email protected]>
--------------------------------------------------------------------------------->
href="https://github.com/intel/lkp-tests.git">https://github.com/intel/lkp-tests.git
bin/lkp install job.yaml # job file is attached in this email
========================================================================
governor/kconfig/mode/nr_task/rootfs/tbox_group/test/testcase/ucode:
ce/x86_64-rhel-8.3/process/50%/debian-10.4-x86_64-20200603.cgz/lkp-csl-2ap3/unlink2/will-it-scale/0x5002f01
("softirq: Rewrite softirq processing loop")
("softirq: Allow early break the softirq processing loop")
56c21abbe67ac004dcea51b34fe
---------------------------
%change %stddev
| \
-9.1% 6140 ± 3% will-it-scale.per_process_ops
-9.1% 589512 ± 3% will-it-scale.workload
+1.8 5.10 ± 11% mpstat.cpu.all.soft%
+4.9% 40.41 boot-time.boot
+5.6% 6874 boot-time.idle
-15.7% 99945 ± 4% numa-meminfo.node2.SUnreclaim
-17.5% 127224 ± 5% numa-meminfo.node2.Slab
-27.7% 9571 ± 4% numa-meminfo.node3.Mapped
+3.6% 50.00 vmstat.cpu.id
-3.4% 49.00 vmstat.cpu.sy
-39.2% 2549 vmstat.system.cs
-4.2% 37037 proc-vmstat.nr_slab_reclaimable
-8.4% 109385 proc-vmstat.nr_slab_unreclaimable
-56.1% 3678508 ± 4% proc-vmstat.numa_hit
-56.8% 3585163 ± 5% proc-vmstat.numa_local
-56.0% 16665578 ± 5% proc-vmstat.pgalloc_normal
-55.9% 16689966 ± 5% proc-vmstat.pgfree
+4.5% 66097 proc-vmstat.pgreuse
-56.3% 885611 ± 6% numa-numastat.node0.local_node
-55.5% 908866 ± 7% numa-numastat.node0.numa_hit
-55.6% 916257 ± 13% numa-numastat.node1.local_node
-55.2% 931844 ± 12% numa-numastat.node1.numa_hit
-61.1% 797809 ± 14% numa-numastat.node2.local_node
-60.1% 828876 ± 14% numa-numastat.node2.numa_hit
-54.8% 958560 ± 7% numa-numastat.node3.local_node
-54.3% 981924 ± 5% numa-numastat.node3.numa_hit
-35.5% 969253 ± 15% numa-vmstat.node0.numa_hit
-37.6% 891503 ± 19% numa-vmstat.node0.numa_local
-36.2% 938480 ± 6% numa-vmstat.node1.numa_hit
-36.3% 865689 ± 7% numa-vmstat.node1.numa_local
-34.0% 72791 ± 25% numa-vmstat.node1.numa_other
-15.6% 24956 ± 4% numa-vmstat.node2.nr_slab_unreclaimable
-45.4% 837688 ± 11% numa-vmstat.node2.numa_hit
-48.8% 732210 ± 15% numa-vmstat.node2.numa_local
-28.8% 2354 ± 4% numa-vmstat.node3.nr_mapped
-39.8% 910203 ± 6% numa-vmstat.node3.numa_hit
-44.3% 794761 ± 8% numa-vmstat.node3.numa_local
-25.1% 6619 ± 11% slabinfo.eventpoll_pwq.active_objs
-25.1% 6619 ± 11% slabinfo.eventpoll_pwq.num_objs
-10.2% 8058 ± 4% slabinfo.files_cache.active_objs
-10.2% 8058 ± 4% slabinfo.files_cache.num_objs
-15.2% 179708 slabinfo.filp.active_objs
-13.8% 2861 slabinfo.filp.active_slabs
-13.8% 183174 slabinfo.filp.num_objs
-13.8% 2861 slabinfo.filp.num_slabs
-19.7% 66310 slabinfo.kmalloc-512.active_objs
-19.4% 1045 slabinfo.kmalloc-512.active_slabs
-19.4% 66940 slabinfo.kmalloc-512.num_objs
-19.4% 1045 slabinfo.kmalloc-512.num_slabs
-22.3% 14144 ± 7% slabinfo.pde_opener.active_objs
-22.3% 14144 ± 7% slabinfo.pde_opener.num_objs
-16.6% 142779 ± 2% slabinfo.shmem_inode_cache.active_objs
-12.3% 3269 ± 2% slabinfo.shmem_inode_cache.active_slabs
-12.3% 150412 ± 2% slabinfo.shmem_inode_cache.num_objs
-12.3% 3269 ± 2% slabinfo.shmem_inode_cache.num_slabs
+16.7% 24526 slabinfo.vmap_area.active_objs
+16.7% 24541 slabinfo.vmap_area.num_objs
+0.5 1.45 ± 28% perf-profile.calltrace.cycles-pp.__memcg_kmem_uncharge.drain_obj_stock.refill_obj_stock.kmem_cache_free.rcu_do_batch
+0.5 1.45 ± 28% perf-profile.calltrace.cycles-pp.page_counter_uncharge.__memcg_kmem_uncharge.drain_obj_stock.refill_obj_stock.kmem_cache_free
+0.5 1.43 ± 27% perf-profile.calltrace.cycles-pp.page_counter_cancel.page_counter_uncharge.__memcg_kmem_uncharge.drain_obj_stock.refill_obj_stock
+0.7 1.47 ± 19% perf-profile.calltrace.cycles-pp.page_counter_cancel.page_counter_uncharge.drain_obj_stock.refill_obj_stock.kmem_cache_free
+0.7 1.48 ± 18% perf-profile.calltrace.cycles-pp.page_counter_uncharge.drain_obj_stock.refill_obj_stock.kmem_cache_free.rcu_do_batch
+1.0 3.55 ± 17% perf-profile.calltrace.cycles-pp.drain_obj_stock.refill_obj_stock.kmem_cache_free.rcu_do_batch.rcu_core
+1.2 4.04 ± 16% perf-profile.calltrace.cycles-pp.refill_obj_stock.kmem_cache_free.rcu_do_batch.rcu_core.__softirqentry_text_start
+3.5 3.54 ± 13% perf-profile.calltrace.cycles-pp.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.native_queued_spin_lock_slowpath._raw_spin_lock.evict
+3.5 3.55 ± 13% perf-profile.calltrace.cycles-pp.asm_sysvec_apic_timer_interrupt.native_queued_spin_lock_slowpath._raw_spin_lock.evict.do_unlinkat
+3.6 3.59 ± 13% perf-profile.calltrace.cycles-pp.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.native_queued_spin_lock_slowpath._raw_spin_lock.inode_sb_list_add
+3.6 3.59 ± 13% perf-profile.calltrace.cycles-pp.asm_sysvec_apic_timer_interrupt.native_queued_spin_lock_slowpath._raw_spin_lock.inode_sb_list_add.new_inode
+5.7 6.74 ± 14% perf-profile.calltrace.cycles-pp.kmem_cache_free.rcu_do_batch.rcu_core.__softirqentry_text_start.asm_call_on_stack
+5.7 6.81 ± 14% perf-profile.calltrace.cycles-pp.rcu_do_batch.rcu_core.__softirqentry_text_start.asm_call_on_stack.do_softirq_own_stack
+5.7 6.82 ± 14% perf-profile.calltrace.cycles-pp.rcu_core.__softirqentry_text_start.asm_call_on_stack.do_softirq_own_stack.irq_exit_rcu
+5.7 6.82 ± 14% perf-profile.calltrace.cycles-pp.asm_call_on_stack.do_softirq_own_stack.irq_exit_rcu.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt
+5.7 6.82 ± 14% perf-profile.calltrace.cycles-pp.__softirqentry_text_start.asm_call_on_stack.do_softirq_own_stack.irq_exit_rcu.sysvec_apic_timer_interrupt
+6.8 6.82 ± 14% perf-profile.calltrace.cycles-pp.do_softirq_own_stack.irq_exit_rcu.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.native_queued_spin_lock_slowpath
+6.8 6.83 ± 14% perf-profile.calltrace.cycles-pp.irq_exit_rcu.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.native_queued_spin_lock_slowpath._raw_spin_lock
-0.3 0.33 ± 11% perf-profile.children.cycles-pp.kmem_cache_alloc
-0.1 0.16 ± 9% perf-profile.children.cycles-pp.shmem_alloc_inode
-0.1 0.23 ± 9% perf-profile.children.cycles-pp.alloc_empty_file
-0.1 0.23 ± 9% perf-profile.children.cycles-pp.__alloc_file
-0.1 0.10 ± 8% perf-profile.children.cycles-pp.obj_cgroup_charge
-0.1 0.16 ± 11% perf-profile.children.cycles-pp.d_alloc_parallel
-0.1 0.14 ± 13% perf-profile.children.cycles-pp.d_alloc
-0.1 0.10 ± 14% perf-profile.children.cycles-pp.__d_alloc
-0.1 0.07 ± 7% perf-profile.children.cycles-pp.page_counter_try_charge
-0.1 0.08 ± 8% perf-profile.children.cycles-pp.__memcg_kmem_charge
-0.0 0.12 ± 14% perf-profile.children.cycles-pp.__mod_memcg_lruvec_state
+0.1 0.32 ± 20% perf-profile.children.cycles-pp.start_kernel
+0.9 3.77 ± 18% perf-profile.children.cycles-pp.drain_obj_stock
+1.1 4.30 ± 17% perf-profile.children.cycles-pp.refill_obj_stock
+2.8 7.22 ± 14% perf-profile.children.cycles-pp.rcu_do_batch
+2.8 7.22 ± 14% perf-profile.children.cycles-pp.rcu_core
+2.8 7.23 ± 14% perf-profile.children.cycles-pp.__softirqentry_text_start
+2.8 7.16 ± 14% perf-profile.children.cycles-pp.kmem_cache_free
+5.6 7.21 ± 14% perf-profile.children.cycles-pp.irq_exit_rcu
+5.6 7.20 ± 14% perf-profile.children.cycles-pp.do_softirq_own_stack
+5.6 7.58 ± 13% perf-profile.children.cycles-pp.asm_sysvec_apic_timer_interrupt
+5.6 7.55 ± 13% perf-profile.children.cycles-pp.sysvec_apic_timer_interrupt
+5.6 7.55 ± 13% perf-profile.children.cycles-pp.asm_call_on_stack
-0.1 0.08 ± 10% perf-profile.self.cycles-pp.kmem_cache_alloc
-0.1 0.06 ± 11% perf-profile.self.cycles-pp.page_counter_try_charge
-0.0 0.08 ± 19% perf-profile.self.cycles-pp.__mod_memcg_state
+0.3 0.66 ± 22% perf-profile.self.cycles-pp.drain_obj_stock
+0.8 2.61 ± 18% perf-profile.self.cycles-pp.page_counter_cancel
+1.7 2.73 ± 48% perf-profile.self.cycles-pp.kmem_cache_free
-6.4% 7.269e+09 perf-stat.i.branch-instructions
-8.5% 24797110 perf-stat.i.branch-misses
-14.5% 1.096e+08 ± 5% perf-stat.i.cache-misses
-11.2% 2.015e+08 ± 4% perf-stat.i.cache-references
-39.8% 2514 perf-stat.i.context-switches
+4.6% 9.54 ± 2% perf-stat.i.cpi
-2.5% 2.989e+11 perf-stat.i.cpu-cycles
-55.5% 195.51 perf-stat.i.cpu-migrations
-21.3% 938565 ± 10% perf-stat.i.dTLB-load-misses
-6.9% 8.277e+09 perf-stat.i.dTLB-loads
-0.0 0.00 perf-stat.i.dTLB-store-miss-rate%
-37.1% 95330 ± 3% perf-stat.i.dTLB-store-misses
-9.0% 1.946e+09 ± 3% perf-stat.i.dTLB-stores
-7.6% 14254140 ± 3% perf-stat.i.iTLB-load-misses
-11.1% 3674504 perf-stat.i.iTLB-loads
-6.7% 3.13e+10 perf-stat.i.instructions
-4.5% 0.11 ± 2% perf-stat.i.ipc
-2.5% 1.56 perf-stat.i.metric.GHz
-35.6% 0.54 ± 2% perf-stat.i.metric.K/sec
-7.0% 92.37 perf-stat.i.metric.M/sec
+3.7 92.92 perf-stat.i.node-load-miss-rate%
-44.0% 1684284 ± 6% perf-stat.i.node-loads
+10.4 94.04 perf-stat.i.node-store-miss-rate%
-7.9% 14616711 ± 2% perf-stat.i.node-store-misses
-70.5% 915605 ± 8% perf-stat.i.node-stores
+4.5% 9.55 ± 2% perf-stat.overall.cpi
-0.0 0.00 perf-stat.overall.dTLB-store-miss-rate%
-4.3% 0.10 ± 2% perf-stat.overall.ipc
+3.8 92.91 perf-stat.overall.node-load-miss-rate%
+10.6 94.09 perf-stat.overall.node-store-miss-rate%
+2.6% 15978455 perf-stat.overall.path-length
-6.4% 7.245e+09 perf-stat.ps.branch-instructions
-8.5% 24693465 perf-stat.ps.branch-misses
-14.5% 1.093e+08 ± 5% perf-stat.ps.cache-misses
-11.2% 2.008e+08 ± 4% perf-stat.ps.cache-references
-39.8% 2495 perf-stat.ps.context-switches
-2.5% 2.979e+11 perf-stat.ps.cpu-cycles
-55.5% 193.75 perf-stat.ps.cpu-migrations
-21.2% 937304 ± 10% perf-stat.ps.dTLB-load-misses
-6.9% 8.249e+09 perf-stat.ps.dTLB-loads
-37.1% 95183 ± 4% perf-stat.ps.dTLB-store-misses
-9.0% 1.939e+09 ± 3% perf-stat.ps.dTLB-stores
-7.6% 14205375 ± 3% perf-stat.ps.iTLB-load-misses
-11.1% 3661007 perf-stat.ps.iTLB-loads
-6.7% 3.12e+10 perf-stat.ps.instructions
-44.1% 1681041 ± 6% perf-stat.ps.node-loads
-7.9% 14568851 ± 2% perf-stat.ps.node-store-misses
-70.6% 916394 ± 8% perf-stat.ps.node-stores
-6.8% 9.414e+12 perf-stat.total.instructions
-98.5% 2403 ±173% sched_debug.cfs_rq:/.MIN_vruntime.avg
-89.9% 371526 ±173% sched_debug.cfs_rq:/.MIN_vruntime.max
-95.7% 29687 ±173% sched_debug.cfs_rq:/.MIN_vruntime.stddev
+56.2% 119995 ± 10% sched_debug.cfs_rq:/.exec_clock.max
-91.6% 6024 ± 93% sched_debug.cfs_rq:/.exec_clock.min
+3417.5% 29015 ± 24% sched_debug.cfs_rq:/.exec_clock.stddev
-71.8% 7545 ± 18% sched_debug.cfs_rq:/.load.avg
-73.0% 145015 ±154% sched_debug.cfs_rq:/.load.max
-85.1% 14357 ±115% sched_debug.cfs_rq:/.load.stddev
-54.3% 10.56 ± 13% sched_debug.cfs_rq:/.load_avg.avg
-46.1% 24.17 ± 24% sched_debug.cfs_rq:/.load_avg.stddev
-98.5% 2403 ±173% sched_debug.cfs_rq:/.max_vruntime.avg
-89.9% 371526 ±173% sched_debug.cfs_rq:/.max_vruntime.max
-95.7% 29687 ±173% sched_debug.cfs_rq:/.max_vruntime.stddev
+63.1% 12309922 ± 10% sched_debug.cfs_rq:/.min_vruntime.max
-90.8% 642714 ± 89% sched_debug.cfs_rq:/.min_vruntime.min
+3517.1% 2970025 ± 24% sched_debug.cfs_rq:/.min_vruntime.stddev
+35.3% 0.62 ± 4% sched_debug.cfs_rq:/.nr_running.avg
-25.7% 1.08 ± 13% sched_debug.cfs_rq:/.nr_running.max
-19.7% 0.39 ± 5% sched_debug.cfs_rq:/.nr_running.stddev
-95.2% 15.34 ± 17% sched_debug.cfs_rq:/.nr_spread_over.avg
-90.7% 34.06 ± 6% sched_debug.cfs_rq:/.nr_spread_over.max
-99.2% 2.12 ± 67% sched_debug.cfs_rq:/.nr_spread_over.min
-62.2% 6.99 ± 4% sched_debug.cfs_rq:/.nr_spread_over.stddev
+35.7% 647.33 ± 3% sched_debug.cfs_rq:/.runnable_avg.avg
-15.0% 406.02 ± 5% sched_debug.cfs_rq:/.runnable_avg.stddev
4840441 ± 23% sched_debug.cfs_rq:/.spread0.avg
+3632.2% 10410349 ± 18% sched_debug.cfs_rq:/.spread0.max
+336.7% -1256871 sched_debug.cfs_rq:/.spread0.min
+3515.3% 2970042 ± 24% sched_debug.cfs_rq:/.spread0.stddev
+40.6% 645.33 ± 3% sched_debug.cfs_rq:/.util_avg.avg
-12.1% 404.72 ± 5% sched_debug.cfs_rq:/.util_avg.stddev
+36.1% 261.56 ± 5% sched_debug.cfs_rq:/.util_est_enqueued.avg
-14.6% 180.28 ± 4% sched_debug.cfs_rq:/.util_est_enqueued.stddev
+371.0% 3411 ± 13% sched_debug.cpu.clock_task.stddev
-25.7% 1.08 ± 13% sched_debug.cpu.nr_running.max
-26.8% 3642 ± 4% sched_debug.cpu.nr_switches.avg
-52.6% 1498 ± 12% sched_debug.cpu.nr_switches.min
+51.6% 3275 ± 17% sched_debug.cpu.nr_switches.stddev
-41.7% 1936 ± 8% sched_debug.cpu.sched_count.avg
-72.8% 617.17 ± 12% sched_debug.cpu.sched_count.min
+64.9% 2767 ± 21% sched_debug.cpu.sched_count.stddev
-36.5% 704.97 ± 8% sched_debug.cpu.sched_goidle.avg
+122.7% 13026 ± 35% sched_debug.cpu.sched_goidle.max
-95.1% 28.95 ± 57% sched_debug.cpu.sched_goidle.min
+64.6% 1378 ± 21% sched_debug.cpu.sched_goidle.stddev
-38.5% 908.81 ± 8% sched_debug.cpu.ttwu_count.avg
+156.9% 14361 ± 32% sched_debug.cpu.ttwu_count.max
-76.5% 229.52 ± 13% sched_debug.cpu.ttwu_count.min
+82.3% 1461 ± 19% sched_debug.cpu.ttwu_count.stddev
-50.4% 545.27 ± 9% sched_debug.cpu.ttwu_local.avg
+70.5% 3096 ± 11% sched_debug.cpu.ttwu_local.max
-75.8% 219.62 ± 12% sched_debug.cpu.ttwu_local.min
+284.7% 488.96 ± 9% sched_debug.cpu.ttwu_local.stddev
-48.7% 22324 ± 24% softirqs.CPU0.RCU
+60.4% 37100 ± 8% softirqs.CPU0.SCHED
+61.2% 75158 ± 22% softirqs.CPU100.RCU
+72.7% 79378 ± 19% softirqs.CPU105.RCU
+80.1% 83607 ± 25% softirqs.CPU106.RCU
+88.9% 87437 ± 27% softirqs.CPU107.RCU
+59.7% 73815 ± 24% softirqs.CPU108.RCU
+75.1% 80839 ± 22% softirqs.CPU109.RCU
+59.8% 74475 ± 15% softirqs.CPU110.RCU
+75.2% 81873 ± 25% softirqs.CPU111.RCU
+17.6% 23468 ± 9% softirqs.CPU118.SCHED
+22.1% 24179 ± 12% softirqs.CPU119.SCHED
+58.2% 73798 ± 31% softirqs.CPU120.RCU
+31.0% 26165 ± 15% softirqs.CPU121.SCHED
-92.5% 397.50 ± 64% softirqs.CPU13.NET_RX
+30.1% 60592 ± 15% softirqs.CPU133.RCU
+60.5% 73956 ± 15% softirqs.CPU134.RCU
+81.6% 83990 ± 23% softirqs.CPU136.RCU
+65.8% 76651 ± 32% softirqs.CPU137.RCU
+82.3% 84453 ± 18% softirqs.CPU138.RCU
+91.9% 89596 ± 8% softirqs.CPU139.RCU
-25.2% 14846 ± 14% softirqs.CPU139.SCHED
+31.9% 61070 ± 19% softirqs.CPU14.RCU
+77.4% 81783 ± 16% softirqs.CPU140.RCU
+48.9% 68768 ± 8% softirqs.CPU141.RCU
+64.7% 75601 ± 24% softirqs.CPU142.RCU
+53.8% 71950 ± 15% softirqs.CPU144.RCU
+30.9% 26067 ± 10% softirqs.CPU147.SCHED
+25.8% 25068 ± 11% softirqs.CPU150.SCHED
+16.3% 23479 ± 10% softirqs.CPU154.SCHED
+19.5% 23881 ± 9% softirqs.CPU156.SCHED
+40.1% 65803 ± 25% softirqs.CPU16.RCU
+34.6% 62487 ± 15% softirqs.CPU160.RCU
+25.2% 58640 ± 12% softirqs.CPU161.RCU
+29.6% 26159 ± 16% softirqs.CPU162.SCHED
+33.3% 26851 ± 20% softirqs.CPU164.SCHED
+33.2% 61681 ± 14% softirqs.CPU168.RCU
+34.5% 26871 ± 6% softirqs.CPU169.SCHED
+28.7% 25870 ± 16% softirqs.CPU170.SCHED
+24.2% 25181 ± 4% softirqs.CPU171.SCHED
+31.1% 26300 ± 10% softirqs.CPU176.SCHED
+26.3% 25326 ± 12% softirqs.CPU178.SCHED
+42.5% 65832 ± 16% softirqs.CPU179.RCU
+49.5% 30130 ± 19% softirqs.CPU180.SCHED
+47.3% 29476 ± 18% softirqs.CPU182.SCHED
+19.3% 23931 ± 5% softirqs.CPU184.SCHED
+36.2% 27411 ± 24% softirqs.CPU186.SCHED
+32.5% 26640 ± 11% softirqs.CPU187.SCHED
+23.9% 25047 ± 20% softirqs.CPU188.SCHED
+57.9% 73587 ± 21% softirqs.CPU19.RCU
+89.5% 86446 ± 12% softirqs.CPU191.RCU
-22.5% 15565 ± 18% softirqs.CPU191.SCHED
+37.2% 64467 ± 17% softirqs.CPU2.RCU
+67.9% 80646 ± 24% softirqs.CPU20.RCU
+55.9% 72558 ± 19% softirqs.CPU21.RCU
+67.6% 78318 ± 9% softirqs.CPU22.RCU
+70.6% 79488 ± 15% softirqs.CPU23.RCU
+85.3% 86598 ± 15% softirqs.CPU25.RCU
+90.7% 89142 ± 20% softirqs.CPU28.RCU
+71.3% 80401 ± 18% softirqs.CPU30.RCU
+63.4% 76302 ± 21% softirqs.CPU31.RCU
+71.3% 81014 ± 33% softirqs.CPU32.RCU
+59.0% 74829 ± 12% softirqs.CPU37.RCU
+35.7% 26736 ± 10% softirqs.CPU43.SCHED
+21.2% 23937 ± 10% softirqs.CPU44.SCHED
+42.6% 67077 ± 7% softirqs.CPU45.RCU
+34.3% 63459 ± 17% softirqs.CPU48.RCU
+65.3% 77189 ± 20% softirqs.CPU49.RCU
+79.6% 84283 ± 10% softirqs.CPU51.RCU
-17.3% 16381 ± 7% softirqs.CPU51.SCHED
+85.6% 86976 ± 27% softirqs.CPU52.RCU
+76.6% 83421 ± 10% softirqs.CPU54.RCU
+79.2% 83489 ± 22% softirqs.CPU55.RCU
+73.8% 81535 ± 24% softirqs.CPU57.RCU
+67.5% 78557 ± 12% softirqs.CPU58.RCU
+55.6% 72626 ± 27% softirqs.CPU59.RCU
+73.5% 80831 ± 13% softirqs.CPU60.RCU
+67.0% 77581 ± 15% softirqs.CPU61.RCU
+95.4% 90998 ± 13% softirqs.CPU62.RCU
+55.3% 72757 ± 12% softirqs.CPU64.RCU
+63.2% 76912 ± 10% softirqs.CPU65.RCU
+98.4% 93224 ± 21% softirqs.CPU66.RCU
+93.8% 91289 ± 21% softirqs.CPU68.RCU
-25.9% 14735 ± 36% softirqs.CPU68.SCHED
+47.3% 69543 ± 24% softirqs.CPU70.RCU
+55.1% 72953 ± 12% softirqs.CPU72.RCU
+92.5% 90158 ± 6% softirqs.CPU73.RCU
-24.9% 14925 ± 10% softirqs.CPU73.SCHED
+86.3% 87128 ± 16% softirqs.CPU74.RCU
+78.9% 83214 ± 8% softirqs.CPU75.RCU
-16.8% 16738 ± 13% softirqs.CPU75.SCHED
+86.1% 87766 ± 23% softirqs.CPU76.RCU
+86.2% 86675 ± 21% softirqs.CPU77.RCU
+83.2% 84657 ± 18% softirqs.CPU78.RCU
+89.3% 88396 ± 13% softirqs.CPU80.RCU
-21.4% 15643 ± 22% softirqs.CPU80.SCHED
+78.7% 83109 ± 16% softirqs.CPU82.RCU
+47.0% 68611 ± 16% softirqs.CPU83.RCU
+113.7% 100165 ± 21% softirqs.CPU84.RCU
+82.7% 84976 ± 19% softirqs.CPU85.RCU
+110.3% 97663 ± 20% softirqs.CPU86.RCU
-34.2% 13260 ± 45% softirqs.CPU86.SCHED
+72.9% 80344 ± 7% softirqs.CPU88.RCU
+88.7% 87890 ± 30% softirqs.CPU89.RCU
+91.4% 89967 ± 27% softirqs.CPU90.RCU
+90.2% 88483 ± 16% softirqs.CPU91.RCU
-22.4% 15380 ± 24% softirqs.CPU91.SCHED
+80.5% 83983 ± 25% softirqs.CPU92.RCU
+15.8% 23197 ± 9% softirqs.CPU95.SCHED
+141.8% 113612 ± 6% softirqs.CPU96.RCU
-60.5% 7864 ± 26% softirqs.CPU96.SCHED
+52.8% 70742 ± 17% softirqs.CPU98.RCU
+45.2% 12983886 softirqs.RCU
-12.4% 65558 softirqs.TIMER
-93.2% 635.00 ± 86% interrupts.34:PCI-MSI.524292-edge.eth0-TxRx-3
-45.9% 3576 ± 34% interrupts.CPU0.NMI:Non-maskable_interrupts
-45.9% 3576 ± 34% interrupts.CPU0.PMI:Performance_monitoring_interrupts
-72.8% 139.25 ± 34% interrupts.CPU0.RES:Rescheduling_interrupts
-54.1% 111.00 ± 32% interrupts.CPU1.RES:Rescheduling_interrupts
-58.8% 94.25 ± 25% interrupts.CPU10.RES:Rescheduling_interrupts
-43.7% 3660 ± 37% interrupts.CPU101.NMI:Non-maskable_interrupts
-43.7% 3660 ± 37% interrupts.CPU101.PMI:Performance_monitoring_interrupts
-45.8% 115.50 ± 36% interrupts.CPU101.RES:Rescheduling_interrupts
-46.9% 122.50 ± 29% interrupts.CPU104.RES:Rescheduling_interrupts
-46.1% 116.00 ± 21% interrupts.CPU105.RES:Rescheduling_interrupts
-36.3% 133.75 ± 18% interrupts.CPU106.RES:Rescheduling_interrupts
-32.6% 155.25 ± 26% interrupts.CPU107.RES:Rescheduling_interrupts
-45.3% 117.25 ± 21% interrupts.CPU108.RES:Rescheduling_interrupts
-11.5% 803.50 interrupts.CPU109.CAL:Function_call_interrupts
-50.4% 126.25 ± 11% interrupts.CPU109.RES:Rescheduling_interrupts
-62.4% 80.00 ± 38% interrupts.CPU11.RES:Rescheduling_interrupts
-37.5% 142.50 ± 21% interrupts.CPU110.RES:Rescheduling_interrupts
-44.8% 116.25 ± 17% interrupts.CPU112.RES:Rescheduling_interrupts
-43.4% 116.50 ± 37% interrupts.CPU113.RES:Rescheduling_interrupts
-39.8% 3949 ± 30% interrupts.CPU114.NMI:Non-maskable_interrupts
-39.8% 3949 ± 30% interrupts.CPU114.PMI:Performance_monitoring_interrupts
-43.1% 121.75 ± 23% interrupts.CPU114.RES:Rescheduling_interrupts
-55.9% 95.25 ± 35% interrupts.CPU115.RES:Rescheduling_interrupts
-39.6% 3977 ± 30% interrupts.CPU116.NMI:Non-maskable_interrupts
-39.6% 3977 ± 30% interrupts.CPU116.PMI:Performance_monitoring_interrupts
-58.1% 91.75 ± 33% interrupts.CPU116.RES:Rescheduling_interrupts
-46.0% 3468 ± 37% interrupts.CPU117.NMI:Non-maskable_interrupts
-46.0% 3468 ± 37% interrupts.CPU117.PMI:Performance_monitoring_interrupts
-54.0% 110.25 ± 26% interrupts.CPU117.RES:Rescheduling_interrupts
-58.2% 93.00 ± 20% interrupts.CPU118.RES:Rescheduling_interrupts
-61.1% 92.00 ± 26% interrupts.CPU119.RES:Rescheduling_interrupts
-44.1% 125.00 ± 37% interrupts.CPU120.RES:Rescheduling_interrupts
-59.7% 87.00 ± 46% interrupts.CPU121.RES:Rescheduling_interrupts
-50.7% 115.00 ± 54% interrupts.CPU123.RES:Rescheduling_interrupts
-67.4% 75.75 ± 49% interrupts.CPU124.RES:Rescheduling_interrupts
-59.2% 92.25 ± 13% interrupts.CPU126.RES:Rescheduling_interrupts
-93.2% 635.00 ± 86% interrupts.CPU13.34:PCI-MSI.524292-edge.eth0-TxRx-3
-52.3% 99.75 ± 30% interrupts.CPU13.RES:Rescheduling_interrupts
+45.1% 8264 ± 9% interrupts.CPU130.NMI:Non-maskable_interrupts
+45.1% 8264 ± 9% interrupts.CPU130.PMI:Performance_monitoring_interrupts
-49.2% 114.00 ± 55% interrupts.CPU130.RES:Rescheduling_interrupts
-54.1% 100.25 ± 56% interrupts.CPU131.RES:Rescheduling_interrupts
-60.3% 82.00 ± 35% interrupts.CPU132.RES:Rescheduling_interrupts
+40.6% 8109 ± 9% interrupts.CPU133.NMI:Non-maskable_interrupts
+40.6% 8109 ± 9% interrupts.CPU133.PMI:Performance_monitoring_interrupts
-60.1% 89.25 ± 10% interrupts.CPU133.RES:Rescheduling_interrupts
-44.8% 118.25 ± 19% interrupts.CPU134.RES:Rescheduling_interrupts
-40.6% 122.75 ± 32% interrupts.CPU136.RES:Rescheduling_interrupts
-43.3% 131.00 ± 35% interrupts.CPU138.RES:Rescheduling_interrupts
-35.4% 142.75 ± 14% interrupts.CPU139.RES:Rescheduling_interrupts
-39.1% 128.25 ± 24% interrupts.CPU140.RES:Rescheduling_interrupts
-48.7% 105.50 ± 14% interrupts.CPU141.RES:Rescheduling_interrupts
+27.4% 8384 ± 7% interrupts.CPU142.NMI:Non-maskable_interrupts
+27.4% 8384 ± 7% interrupts.CPU142.PMI:Performance_monitoring_interrupts
-35.6% 130.00 ± 33% interrupts.CPU142.RES:Rescheduling_interrupts
-50.6% 106.25 ± 39% interrupts.CPU143.RES:Rescheduling_interrupts
-45.2% 117.25 ± 15% interrupts.CPU144.RES:Rescheduling_interrupts
-57.6% 92.75 ± 41% interrupts.CPU145.RES:Rescheduling_interrupts
+34.4% 8729 interrupts.CPU146.NMI:Non-maskable_interrupts
+34.4% 8729 interrupts.CPU146.PMI:Performance_monitoring_interrupts
-53.6% 95.50 ± 31% interrupts.CPU146.RES:Rescheduling_interrupts
-7.3% 811.50 interrupts.CPU147.CAL:Function_call_interrupts
-64.3% 79.50 ± 29% interrupts.CPU147.RES:Rescheduling_interrupts
-67.5% 67.00 ± 52% interrupts.CPU148.RES:Rescheduling_interrupts
-67.8% 73.00 ± 29% interrupts.CPU149.RES:Rescheduling_interrupts
-65.9% 76.75 ± 9% interrupts.CPU150.RES:Rescheduling_interrupts
-67.0% 71.25 ± 37% interrupts.CPU151.RES:Rescheduling_interrupts
-53.1% 97.75 ± 57% interrupts.CPU152.RES:Rescheduling_interrupts
-63.9% 71.75 ± 48% interrupts.CPU153.RES:Rescheduling_interrupts
-59.5% 93.25 ± 27% interrupts.CPU154.RES:Rescheduling_interrupts
-99.1% 1.50 ± 33% interrupts.CPU154.TLB:TLB_shootdowns
-59.2% 85.00 ± 33% interrupts.CPU155.RES:Rescheduling_interrupts
-51.5% 3214 ± 48% interrupts.CPU156.NMI:Non-maskable_interrupts
-51.5% 3214 ± 48% interrupts.CPU156.PMI:Performance_monitoring_interrupts
-65.0% 74.75 ± 36% interrupts.CPU156.RES:Rescheduling_interrupts
-59.1% 78.75 ± 31% interrupts.CPU157.RES:Rescheduling_interrupts
-68.3% 62.00 ± 29% interrupts.CPU158.RES:Rescheduling_interrupts
-60.4% 74.25 ± 54% interrupts.CPU159.RES:Rescheduling_interrupts
-45.7% 116.50 ± 24% interrupts.CPU16.RES:Rescheduling_interrupts
-60.2% 67.00 ± 15% interrupts.CPU160.RES:Rescheduling_interrupts
-54.5% 65.00 ± 29% interrupts.CPU161.RES:Rescheduling_interrupts
-67.2% 52.50 ± 49% interrupts.CPU162.RES:Rescheduling_interrupts
-46.8% 66.00 ± 39% interrupts.CPU163.RES:Rescheduling_interrupts
-66.1% 41.25 ± 41% interrupts.CPU164.RES:Rescheduling_interrupts
-59.7% 44.00 ± 37% interrupts.CPU165.RES:Rescheduling_interrupts
-66.3% 35.00 ± 19% interrupts.CPU166.RES:Rescheduling_interrupts
-69.2% 24.75 ± 29% interrupts.CPU168.RES:Rescheduling_interrupts
-48.6% 116.50 ± 37% interrupts.CPU17.RES:Rescheduling_interrupts
-84.0% 13.00 ± 62% interrupts.CPU173.RES:Rescheduling_interrupts
-83.7% 13.25 ± 34% interrupts.CPU174.RES:Rescheduling_interrupts
-9.2% 812.50 interrupts.CPU179.CAL:Function_call_interrupts
-88.6% 11.00 ± 28% interrupts.CPU179.RES:Rescheduling_interrupts
-47.8% 108.00 ± 38% interrupts.CPU18.RES:Rescheduling_interrupts
-87.9% 9.50 ± 71% interrupts.CPU185.RES:Rescheduling_interrupts
-7.8% 831.25 ± 3% interrupts.CPU187.CAL:Function_call_interrupts
-85.3% 11.75 ± 54% interrupts.CPU187.RES:Rescheduling_interrupts
-38.5% 130.50 ± 34% interrupts.CPU19.RES:Rescheduling_interrupts
-90.4% 8.25 ± 31% interrupts.CPU190.RES:Rescheduling_interrupts
-22.5% 820.25 ± 2% interrupts.CPU2.CAL:Function_call_interrupts
-41.8% 124.50 ± 17% interrupts.CPU2.RES:Rescheduling_interrupts
-32.4% 139.50 ± 27% interrupts.CPU20.RES:Rescheduling_interrupts
-40.9% 121.25 ± 31% interrupts.CPU21.RES:Rescheduling_interrupts
-44.6% 131.75 ± 23% interrupts.CPU22.RES:Rescheduling_interrupts
-36.1% 133.00 ± 29% interrupts.CPU23.RES:Rescheduling_interrupts
-50.8% 102.75 ± 45% interrupts.CPU24.RES:Rescheduling_interrupts
-28.4% 151.75 ± 16% interrupts.CPU25.RES:Rescheduling_interrupts
-44.0% 117.25 ± 48% interrupts.CPU26.RES:Rescheduling_interrupts
-29.5% 4593 ± 27% interrupts.CPU27.NMI:Non-maskable_interrupts
-29.5% 4593 ± 27% interrupts.CPU27.PMI:Performance_monitoring_interrupts
-52.6% 105.00 ± 59% interrupts.CPU27.RES:Rescheduling_interrupts
-26.0% 148.50 ± 24% interrupts.CPU28.RES:Rescheduling_interrupts
-51.8% 108.00 ± 65% interrupts.CPU29.RES:Rescheduling_interrupts
-53.4% 100.50 ± 57% interrupts.CPU3.RES:Rescheduling_interrupts
-33.4% 136.50 ± 20% interrupts.CPU30.RES:Rescheduling_interrupts
-41.8% 127.50 ± 26% interrupts.CPU31.RES:Rescheduling_interrupts
-34.1% 138.75 ± 36% interrupts.CPU33.RES:Rescheduling_interrupts
-38.3% 4039 ± 30% interrupts.CPU34.NMI:Non-maskable_interrupts
-38.3% 4039 ± 30% interrupts.CPU34.PMI:Performance_monitoring_interrupts
-51.9% 103.25 ± 62% interrupts.CPU34.RES:Rescheduling_interrupts
-43.6% 118.50 ± 51% interrupts.CPU35.RES:Rescheduling_interrupts
-25.8% 159.25 ± 8% interrupts.CPU36.RES:Rescheduling_interrupts
-44.1% 3608 ± 44% interrupts.CPU37.NMI:Non-maskable_interrupts
-44.1% 3608 ± 44% interrupts.CPU37.PMI:Performance_monitoring_interrupts
-35.9% 131.75 ± 18% interrupts.CPU37.RES:Rescheduling_interrupts
-99.1% 1.50 ±110% interrupts.CPU38.TLB:TLB_shootdowns
-48.4% 117.75 ± 27% interrupts.CPU41.RES:Rescheduling_interrupts
-52.1% 112.50 ± 31% interrupts.CPU42.RES:Rescheduling_interrupts
-66.4% 69.00 ± 24% interrupts.CPU43.RES:Rescheduling_interrupts
-59.2% 84.75 ± 30% interrupts.CPU44.RES:Rescheduling_interrupts
-53.4% 106.00 ± 4% interrupts.CPU45.RES:Rescheduling_interrupts
-45.5% 2678 ± 11% interrupts.CPU46.NMI:Non-maskable_interrupts
-45.5% 2678 ± 11% interrupts.CPU46.PMI:Performance_monitoring_interrupts
-56.7% 91.50 ± 24% interrupts.CPU46.RES:Rescheduling_interrupts
-43.6% 3242 ± 20% interrupts.CPU47.NMI:Non-maskable_interrupts
-43.6% 3242 ± 20% interrupts.CPU47.PMI:Performance_monitoring_interrupts
-39.1% 125.50 ± 17% interrupts.CPU47.RES:Rescheduling_interrupts
-49.8% 3234 ± 20% interrupts.CPU48.NMI:Non-maskable_interrupts
-49.8% 3234 ± 20% interrupts.CPU48.PMI:Performance_monitoring_interrupts
-57.5% 96.25 ± 15% interrupts.CPU48.RES:Rescheduling_interrupts
-16.9% 826.25 ± 4% interrupts.CPU49.CAL:Function_call_interrupts
-53.7% 123.75 ± 26% interrupts.CPU49.RES:Rescheduling_interrupts
+16.4% 986.50 ± 12% interrupts.CPU5.CAL:Function_call_interrupts
-47.1% 119.50 ± 36% interrupts.CPU5.RES:Rescheduling_interrupts
-62.3% 2505 ± 24% interrupts.CPU50.NMI:Non-maskable_interrupts
-62.3% 2505 ± 24% interrupts.CPU50.PMI:Performance_monitoring_interrupts
-41.6% 122.00 ± 18% interrupts.CPU50.RES:Rescheduling_interrupts
-13.4% 807.75 interrupts.CPU51.CAL:Function_call_interrupts
-40.3% 3961 ± 16% interrupts.CPU51.NMI:Non-maskable_interrupts
-40.3% 3961 ± 16% interrupts.CPU51.PMI:Performance_monitoring_interrupts
-52.0% 131.25 ± 18% interrupts.CPU51.RES:Rescheduling_interrupts
-10.0% 803.00 interrupts.CPU52.CAL:Function_call_interrupts
-44.9% 3610 ± 20% interrupts.CPU52.NMI:Non-maskable_interrupts
-44.9% 3610 ± 20% interrupts.CPU52.PMI:Performance_monitoring_interrupts
-37.1% 149.25 ± 27% interrupts.CPU52.RES:Rescheduling_interrupts
-31.3% 144.00 ± 26% interrupts.CPU53.RES:Rescheduling_interrupts
-30.7% 142.00 ± 13% interrupts.CPU54.RES:Rescheduling_interrupts
-38.1% 141.75 ± 28% interrupts.CPU55.RES:Rescheduling_interrupts
-49.1% 111.00 ± 56% interrupts.CPU56.RES:Rescheduling_interrupts
-9.1% 805.50 interrupts.CPU57.CAL:Function_call_interrupts
-41.5% 142.75 ± 37% interrupts.CPU57.RES:Rescheduling_interrupts
-52.7% 116.50 ± 23% interrupts.CPU58.RES:Rescheduling_interrupts
-8.5% 805.50 interrupts.CPU59.CAL:Function_call_interrupts
-46.1% 126.75 ± 29% interrupts.CPU59.RES:Rescheduling_interrupts
-10.8% 804.75 interrupts.CPU60.CAL:Function_call_interrupts
-45.1% 133.75 ± 26% interrupts.CPU60.RES:Rescheduling_interrupts
-6.1% 818.00 ± 3% interrupts.CPU61.CAL:Function_call_interrupts
-51.1% 119.50 ± 24% interrupts.CPU61.RES:Rescheduling_interrupts
-36.0% 146.50 ± 19% interrupts.CPU62.RES:Rescheduling_interrupts
-41.7% 130.25 ± 44% interrupts.CPU63.RES:Rescheduling_interrupts
-44.6% 126.25 ± 15% interrupts.CPU64.RES:Rescheduling_interrupts
-39.4% 135.00 ± 23% interrupts.CPU65.RES:Rescheduling_interrupts
-13.4% 806.25 interrupts.CPU66.CAL:Function_call_interrupts
-41.3% 150.75 ± 35% interrupts.CPU66.RES:Rescheduling_interrupts
-59.0% 92.75 ± 69% interrupts.CPU67.RES:Rescheduling_interrupts
-43.9% 114.75 ± 42% interrupts.CPU69.RES:Rescheduling_interrupts
-48.2% 120.00 ± 22% interrupts.CPU70.RES:Rescheduling_interrupts
-9.2% 807.00 interrupts.CPU71.CAL:Function_call_interrupts
-55.5% 104.00 ± 45% interrupts.CPU71.RES:Rescheduling_interrupts
-41.7% 129.75 ± 22% interrupts.CPU72.RES:Rescheduling_interrupts
-27.9% 154.75 ± 20% interrupts.CPU73.RES:Rescheduling_interrupts
-34.1% 144.50 ± 5% interrupts.CPU74.RES:Rescheduling_interrupts
-28.6% 152.75 ± 12% interrupts.CPU75.RES:Rescheduling_interrupts
-35.6% 145.25 ± 21% interrupts.CPU76.RES:Rescheduling_interrupts
+47.1% 8393 ± 6% interrupts.CPU77.NMI:Non-maskable_interrupts
+47.1% 8393 ± 6% interrupts.CPU77.PMI:Performance_monitoring_interrupts
-12.2% 806.75 interrupts.CPU79.CAL:Function_call_interrupts
-62.3% 91.75 ± 63% interrupts.CPU79.RES:Rescheduling_interrupts
-54.3% 104.75 ± 38% interrupts.CPU8.RES:Rescheduling_interrupts
-12.2% 837.25 ± 4% interrupts.CPU80.CAL:Function_call_interrupts
-40.6% 123.50 ± 20% interrupts.CPU82.RES:Rescheduling_interrupts
-9.2% 842.00 ± 8% interrupts.CPU83.CAL:Function_call_interrupts
-47.7% 121.25 ± 27% interrupts.CPU83.RES:Rescheduling_interrupts
+31.5% 8607 ± 2% interrupts.CPU84.NMI:Non-maskable_interrupts
+31.5% 8607 ± 2% interrupts.CPU84.PMI:Performance_monitoring_interrupts
-29.8% 161.75 ± 21% interrupts.CPU84.RES:Rescheduling_interrupts
-49.6% 113.00 ± 49% interrupts.CPU87.RES:Rescheduling_interrupts
-52.1% 116.25 ± 29% interrupts.CPU88.RES:Rescheduling_interrupts
+43.6% 8148 ± 12% interrupts.CPU89.NMI:Non-maskable_interrupts
+43.6% 8148 ± 12% interrupts.CPU89.PMI:Performance_monitoring_interrupts
-46.5% 112.25 ± 30% interrupts.CPU9.RES:Rescheduling_interrupts
+51.7% 8683 interrupts.CPU90.NMI:Non-maskable_interrupts
+51.7% 8683 interrupts.CPU90.PMI:Performance_monitoring_interrupts
-42.9% 148.25 ± 37% interrupts.CPU90.RES:Rescheduling_interrupts
-35.1% 137.50 ± 33% interrupts.CPU91.RES:Rescheduling_interrupts
-44.4% 117.00 ± 28% interrupts.CPU92.RES:Rescheduling_interrupts
-61.1% 88.75 ± 34% interrupts.CPU93.RES:Rescheduling_interrupts
-52.5% 103.75 ± 38% interrupts.CPU94.RES:Rescheduling_interrupts
-80.7% 41.75 ± 73% interrupts.CPU95.RES:Rescheduling_interrupts
+33.0% 8725 interrupts.CPU96.NMI:Non-maskable_interrupts
+33.0% 8725 interrupts.CPU96.PMI:Performance_monitoring_interrupts
-41.2% 127.75 ± 23% interrupts.CPU97.RES:Rescheduling_interrupts
-48.9% 110.25 ± 29% interrupts.CPU98.RES:Rescheduling_interrupts
-45.7% 20733 ± 6% interrupts.RES:Rescheduling_interrupts

will-it-scale.per_process_ops

-----------------------------------------------------------+
|
+.+ |
.+..+..+ + : + +. |
+ : + : + + .. +..|
.. : .+ : .+.. .+ + .+..+ |
+ + +. + +. +..+ |
+ .. |
+ O O O |
O O O |
O |
O O O O O O O |
O |
O |
-----------------------------------------------------------+

have been estimated based on internal Intel analysis and are provided
purposes only. Any difference in system hardware or software
configuration may affect actual performance.

Attachments:

(No filename) (49.71 kB)
config-5.9.0-rc5-00009-g56c21abbe67ac (172.80 kB)
job-script (7.79 kB)
job.yaml (5.21 kB)
reproduce (349.00 B)
Download all attachments

2020-09-28 09:22:44

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Tue, Sep 15, 2020 at 07:56:09PM +0800, [email protected] wrote:
> From: jun qian <[email protected]>
>
> Allow terminating the softirq processing loop without finishing the vectors.
>
> Signed-off-by: jun qian <[email protected]>
> ---
> kernel/softirq.c | 113 ++++++++++++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 91 insertions(+), 22 deletions(-)

This is still a ginormous patch for something that should be simple.

I've put my patches (and a bunch on top) in:

git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git core/softirq

but got a fairly significant regression report from 0day on that, and
haven't had time/motivation to look at that.

2020-09-28 09:26:26

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Tue, Sep 15, 2020 at 07:56:09PM +0800, [email protected] wrote:
> +/*
> + * The pending_next_bit is recorded for the next processing order when
> + * the loop is broken. This per cpu variable is to solve the following
> + * scenarios:

This, that adds all that complexity, and I think it's wrong. The
softirqs are priority ordered. Running then again from 0 up if/when you
break seems 'right'.

2020-09-28 10:12:59

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Mon, Sep 28 2020 at 11:22, Peter Zijlstra wrote:

> On Tue, Sep 15, 2020 at 07:56:09PM +0800, [email protected] wrote:
>> +/*
>> + * The pending_next_bit is recorded for the next processing order when
>> + * the loop is broken. This per cpu variable is to solve the following
>> + * scenarios:
>
> This, that adds all that complexity, and I think it's wrong. The
> softirqs are priority ordered. Running then again from 0 up if/when you
> break seems 'right'.

No. If you break the loop and then restart from 0 you can starve the
higher numbered ones if the next loop terminates on early because one of
the lower one takes too long. Made that happen with networking :)

See the variant I proposed :)

Thanks,

tglx

2020-09-28 10:53:22

by jun qian

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

Frederic Weisbecker <[email protected]> 于2020年9月25日周五上午8:42写道：
>
> On Thu, Sep 24, 2020 at 05:37:42PM +0200, Thomas Gleixner wrote:
> > Subject: softirq; Prevent starvation of higher softirq vectors
> [...]
> > + /*
> > + * Word swap pending to move the not yet handled bits of the previous
> > + * run first and then clear the duplicates in the newly raised ones.
> > + */
> > + swahw32s(&cur_pending);
> > + pending = cur_pending & ~(cur_pending << SIRQ_PREV_SHIFT);
> > +
> > for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
> > int prev_count;
> >
> > + vec_nr &= SIRQ_VECTOR_MASK;
>
> Shouldn't NR_SOFTIRQS above protect from that?
>
> > __clear_bit(vec_nr, &pending);
> > kstat_incr_softirqs_this_cpu(vec_nr);
> >
> [...]
> > + } else {
> > + /*
> > + * Retain the unprocessed bits and swap @cur_pending back
> > + * into normal ordering
> > + */
> > + cur_pending = (u32)pending;
> > + swahw32s(&cur_pending);
> > + /*
> > + * If the previous bits are done move the low word of
> > + * @pending into the high word so it's processed first.
> > + */
> > + if (!(cur_pending & SIRQ_PREV_MASK))
> > + cur_pending <<= SIRQ_PREV_SHIFT;
>
> If the previous bits are done and there is no timeout, should
> we consider to restart a loop?
>
> A common case would be to enter do_softirq() with RCU_SOFTIRQ set
> in the SIRQ_PREV_MASK and NET_RX_SOFTIRQ set in the normal mask.
>
> You would always end up processing the RCU_SOFTIRQ here and trigger
> ksoftirqd for the NET_RX_SOFTIRQ.

yes, I found that this problem also exists in our project. The RCU
softirq may cost
9ms, that will delay the net_rx/tx softirq to process, Peter's branch
maybe can slove
the problem
git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git core/softirq

>
> Although that's probably no big deal as we should be already in ksoftirqd
> if we processed prev bits. We are just going to iterate the kthread loop
> instead of the do_softirq loop. Probably no real issue then...
>
>
> >
> > + /* Merge the newly pending ones into the low word */
> > + cur_pending |= new_pending;
> > + }
> > + set_softirq_pending(cur_pending);
> > wakeup_softirqd();
> > out:
> > lockdep_softirq_end(in_hardirq);
>

2020-09-28 11:17:24

by jun qian

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

Peter Zijlstra <[email protected]> 于2020年9月28日周一下午5:20写道：
>
> On Tue, Sep 15, 2020 at 07:56:09PM +0800, [email protected] wrote:
> > From: jun qian <[email protected]>
> >
> > Allow terminating the softirq processing loop without finishing the vectors.
> >
> > Signed-off-by: jun qian <[email protected]>
> > ---
> > kernel/softirq.c | 113 ++++++++++++++++++++++++++++++++++++++++++++-----------
> > 1 file changed, 91 insertions(+), 22 deletions(-)
>
> This is still a ginormous patch for something that should be simple.
>

Yes, i think so. Because of the left pending bits need to be processed,
the code is going to be not simple.I will try my best to simplify this process.

> I've put my patches (and a bunch on top) in:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git core/softirq
>
> but got a fairly significant regression report from 0day on that, and
> haven't had time/motivation to look at that.
>

OK, I am trying to slove the significant regression problem, maybe it
needs some time.

thanks

>

2020-09-29 11:57:21

by Frederic Weisbecker

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Mon, Sep 28, 2020 at 06:51:48PM +0800, jun qian wrote:
> Frederic Weisbecker <[email protected]> 于2020年9月25日周五上午8:42写道：
> >
> > On Thu, Sep 24, 2020 at 05:37:42PM +0200, Thomas Gleixner wrote:
> > > Subject: softirq; Prevent starvation of higher softirq vectors
> > [...]
> > > + /*
> > > + * Word swap pending to move the not yet handled bits of the previous
> > > + * run first and then clear the duplicates in the newly raised ones.
> > > + */
> > > + swahw32s(&cur_pending);
> > > + pending = cur_pending & ~(cur_pending << SIRQ_PREV_SHIFT);
> > > +
> > > for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
> > > int prev_count;
> > >
> > > + vec_nr &= SIRQ_VECTOR_MASK;
> >
> > Shouldn't NR_SOFTIRQS above protect from that?
> >
> > > __clear_bit(vec_nr, &pending);
> > > kstat_incr_softirqs_this_cpu(vec_nr);
> > >
> > [...]
> > > + } else {
> > > + /*
> > > + * Retain the unprocessed bits and swap @cur_pending back
> > > + * into normal ordering
> > > + */
> > > + cur_pending = (u32)pending;
> > > + swahw32s(&cur_pending);
> > > + /*
> > > + * If the previous bits are done move the low word of
> > > + * @pending into the high word so it's processed first.
> > > + */
> > > + if (!(cur_pending & SIRQ_PREV_MASK))
> > > + cur_pending <<= SIRQ_PREV_SHIFT;
> >
> > If the previous bits are done and there is no timeout, should
> > we consider to restart a loop?
> >
> > A common case would be to enter do_softirq() with RCU_SOFTIRQ set
> > in the SIRQ_PREV_MASK and NET_RX_SOFTIRQ set in the normal mask.
> >
> > You would always end up processing the RCU_SOFTIRQ here and trigger
> > ksoftirqd for the NET_RX_SOFTIRQ.
>
> yes, I found that this problem also exists in our project. The RCU
> softirq may cost
> 9ms,

Ouch!

> that will delay the net_rx/tx softirq to process, Peter's branch
> maybe can slove
> the problem
> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git core/softirq

It's probably also the right time for me to resume on this patchset:

https://lwn.net/Articles/779564/

In the long term this will allow us to have per vector threads that can be
individually triggered upon high loads, and even soft interruptible by
other vectors from irq_exit(). Also if several vectors are on high loads
at the same time, this leaves the balance decisions to the scheduler instead
of all these workarounds we scratch our heads on for several years now.

Besides, I'm convinced that splitting the softirqs is something we want in
the long run anyway.

2020-10-09 15:07:11

by Qais Yousef

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On 09/29/20 13:44, Frederic Weisbecker wrote:
> > that will delay the net_rx/tx softirq to process, Peter's branch
> > maybe can slove
> > the problem
> > git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git core/softirq
>
> It's probably also the right time for me to resume on this patchset:
>
> https://lwn.net/Articles/779564/
>
> In the long term this will allow us to have per vector threads that can be
> individually triggered upon high loads, and even soft interruptible by
> other vectors from irq_exit(). Also if several vectors are on high loads
> at the same time, this leaves the balance decisions to the scheduler instead
> of all these workarounds we scratch our heads on for several years now.
>
> Besides, I'm convinced that splitting the softirqs is something we want in
> the long run anyway.

So if I understood correctly we'll end up with a kthread for each softirq type
that can be scheduled individually on any CPU following the 'normal' scheduler
rules, correct?

If I got it right, I like that. I certainly think having these softirqs as RT
threads (like irq threads) makes a lot more sense. At least one would be able
to use priorities to reason about when it's okay to preempt them or not.

If I got it wrong, why we can't do that?

Thanks

--
Qais Yousef

2020-10-13 12:05:51

by Frederic Weisbecker

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On Fri, Oct 09, 2020 at 04:01:39PM +0100, Qais Yousef wrote:
> On 09/29/20 13:44, Frederic Weisbecker wrote:
> > > that will delay the net_rx/tx softirq to process, Peter's branch
> > > maybe can slove
> > > the problem
> > > git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git core/softirq
> >
> > It's probably also the right time for me to resume on this patchset:
> >
> > https://lwn.net/Articles/779564/
> >
> > In the long term this will allow us to have per vector threads that can be
> > individually triggered upon high loads, and even soft interruptible by
> > other vectors from irq_exit(). Also if several vectors are on high loads
> > at the same time, this leaves the balance decisions to the scheduler instead
> > of all these workarounds we scratch our heads on for several years now.
> >
> > Besides, I'm convinced that splitting the softirqs is something we want in
> > the long run anyway.
>
> So if I understood correctly we'll end up with a kthread for each softirq type
> that can be scheduled individually on any CPU following the 'normal' scheduler
> rules, correct?
>
> If I got it right, I like that. I certainly think having these softirqs as RT
> threads (like irq threads) makes a lot more sense. At least one would be able
> to use priorities to reason about when it's okay to preempt them or not.
>
> If I got it wrong, why we can't do that?

We can't do that right away because some softirq vectors may rely on the
fact that they can't be interrupted by other softirq vectors. If they use
per cpu data, they can perfectly assume that it's locally softirq-safe
and not use any lock to protect it, provided the data is stricly per-cpu
of course.

So we'll need to check all the softirq handlers and make sure they don't
do such assumption, or fix the site. I can imagine it as an iterative
pushdown just like we did with the big kernel lock.

Thanks.

2020-10-13 16:28:13

by Qais Yousef

[permalink] [raw]

Subject: Re: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop

On 10/13/20 12:43, Frederic Weisbecker wrote:
> On Fri, Oct 09, 2020 at 04:01:39PM +0100, Qais Yousef wrote:
> > On 09/29/20 13:44, Frederic Weisbecker wrote:
> > > > that will delay the net_rx/tx softirq to process, Peter's branch
> > > > maybe can slove
> > > > the problem
> > > > git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git core/softirq
> > >
> > > It's probably also the right time for me to resume on this patchset:
> > >
> > > https://lwn.net/Articles/779564/
> > >
> > > In the long term this will allow us to have per vector threads that can be
> > > individually triggered upon high loads, and even soft interruptible by
> > > other vectors from irq_exit(). Also if several vectors are on high loads
> > > at the same time, this leaves the balance decisions to the scheduler instead
> > > of all these workarounds we scratch our heads on for several years now.
> > >
> > > Besides, I'm convinced that splitting the softirqs is something we want in
> > > the long run anyway.
> >
> > So if I understood correctly we'll end up with a kthread for each softirq type
> > that can be scheduled individually on any CPU following the 'normal' scheduler
> > rules, correct?
> >
> > If I got it right, I like that. I certainly think having these softirqs as RT
> > threads (like irq threads) makes a lot more sense. At least one would be able
> > to use priorities to reason about when it's okay to preempt them or not.
> >
> > If I got it wrong, why we can't do that?
>
> We can't do that right away because some softirq vectors may rely on the
> fact that they can't be interrupted by other softirq vectors. If they use
> per cpu data, they can perfectly assume that it's locally softirq-safe
> and not use any lock to protect it, provided the data is stricly per-cpu
> of course.
>
> So we'll need to check all the softirq handlers and make sure they don't
> do such assumption, or fix the site. I can imagine it as an iterative
> pushdown just like we did with the big kernel lock.

Thanks Frederic. I know what to do in my free cycles now ;-)

FWIW, NAPI seems to have learnt the ability to use kthreads (in case you missed
it)

https://lwn.net/Articles/833840/
https://lwn.net/ml/netdev/[email protected]/

Thanks

--
Qais Yousef