LinuxLists.cc - [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

2020-09-01 14:45:08

Subject: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.

set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.

This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.

On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.

One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).

Reviewed-by: Valentin Schneider <[email protected]>
Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/smp.h | 5 ++
arch/arm64/kernel/smp.c | 93 +++++++++++++++++++++++++++++++-----
3 files changed, 87 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6d232837cbee..d0fdbe5fb32f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -106,6 +106,7 @@ config ARM64
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_IDLE_POLL_SETUP
+ select GENERIC_IRQ_IPI
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 0eadbf933e35..57c5db15f6b7 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -78,6 +78,11 @@ extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));

extern void (*__smp_cross_call)(const struct cpumask *, unsigned int);

+/*
+ * Register IPI interrupts with the arch SMP code
+ */
+extern void set_smp_ipi_range(int ipi_base, int nr_ipi);
+
/*
* Called from the secondary holding pen, this is the secondary CPU entry point.
*/
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 355ee9eed4dd..00c9db1b61b5 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -75,6 +75,13 @@ enum ipi_msg_type {
IPI_WAKEUP
};

+static int ipi_irq_base __read_mostly;
+static int nr_ipi __read_mostly = NR_IPI;
+static struct irq_desc *ipi_desc[NR_IPI] __read_mostly;
+
+static void ipi_setup(int cpu);
+static void ipi_teardown(int cpu);
+
#ifdef CONFIG_HOTPLUG_CPU
static int op_cpu_kill(unsigned int cpu);
#else
@@ -237,6 +244,8 @@ asmlinkage notrace void secondary_start_kernel(void)
*/
notify_cpu_starting(cpu);

+ ipi_setup(cpu);
+
store_cpu_topology(cpu);
numa_add_cpu(cpu);

@@ -302,6 +311,7 @@ int __cpu_disable(void)
* and we must not schedule until we're ready to give up the cpu.
*/
set_cpu_online(cpu, false);
+ ipi_teardown(cpu);

/*
* OK - migrate IRQs away from this CPU
@@ -890,10 +900,9 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
/*
* Main handler for inter-processor interrupts
*/
-void handle_IPI(int ipinr, struct pt_regs *regs)
+static void do_handle_IPI(int ipinr)
{
unsigned int cpu = smp_processor_id();
- struct pt_regs *old_regs = set_irq_regs(regs);

if ((unsigned)ipinr < NR_IPI) {
trace_ipi_entry_rcuidle(ipi_types[ipinr]);
@@ -906,21 +915,16 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;

case IPI_CALL_FUNC:
- irq_enter();
generic_smp_call_function_interrupt();
- irq_exit();
break;

case IPI_CPU_STOP:
- irq_enter();
local_cpu_stop();
- irq_exit();
break;

case IPI_CPU_CRASH_STOP:
if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
- irq_enter();
- ipi_cpu_crash_stop(cpu, regs);
+ ipi_cpu_crash_stop(cpu, get_irq_regs());

unreachable();
}
@@ -928,17 +932,13 @@ void handle_IPI(int ipinr, struct pt_regs *regs)

#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
case IPI_TIMER:
- irq_enter();
tick_receive_broadcast();
- irq_exit();
break;
#endif

#ifdef CONFIG_IRQ_WORK
case IPI_IRQ_WORK:
- irq_enter();
irq_work_run();
- irq_exit();
break;
#endif

@@ -957,9 +957,78 @@ void handle_IPI(int ipinr, struct pt_regs *regs)

if ((unsigned)ipinr < NR_IPI)
trace_ipi_exit_rcuidle(ipi_types[ipinr]);
+}
+
+/* Legacy version, should go away once all irqchips have been converted */
+void handle_IPI(int ipinr, struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ irq_enter();
+ do_handle_IPI(ipinr);
+ irq_exit();
+
set_irq_regs(old_regs);
}

+static irqreturn_t ipi_handler(int irq, void *data)
+{
+ do_handle_IPI(irq - ipi_irq_base);
+ return IRQ_HANDLED;
+}
+
+static void ipi_send(const struct cpumask *target, unsigned int ipi)
+{
+ __ipi_send_mask(ipi_desc[ipi], target);
+}
+
+static void ipi_setup(int cpu)
+{
+ int i;
+
+ if (!ipi_irq_base)
+ return;
+
+ for (i = 0; i < nr_ipi; i++)
+ enable_percpu_irq(ipi_irq_base + i, 0);
+}
+
+static void ipi_teardown(int cpu)
+{
+ int i;
+
+ if (!ipi_irq_base)
+ return;
+
+ for (i = 0; i < nr_ipi; i++)
+ disable_percpu_irq(ipi_irq_base + i);
+}
+
+void __init set_smp_ipi_range(int ipi_base, int n)
+{
+ int i;
+
+ WARN_ON(n < NR_IPI);
+ nr_ipi = min(n, NR_IPI);
+
+ for (i = 0; i < nr_ipi; i++) {
+ int err;
+
+ err = request_percpu_irq(ipi_base + i, ipi_handler,
+ "IPI", &irq_stat);
+ WARN_ON(err);
+
+ ipi_desc[i] = irq_to_desc(ipi_base + i);
+ irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+ }
+
+ ipi_irq_base = ipi_base;
+ __smp_cross_call = ipi_send;
+
+ /* Setup the boot CPU immediately */
+ ipi_setup(smp_processor_id());
+}
+
void smp_send_reschedule(int cpu)
{
smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
--
2.27.0

2020-09-11 16:43:28

by Catalin Marinas

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

On Tue, Sep 01, 2020 at 03:43:11PM +0100, Marc Zyngier wrote:
> In order to deal with IPIs as normal interrupts, let's add
> a new way to register them with the architecture code.
>
> set_smp_ipi_range() takes a range of interrupts, and allows
> the arch code to request them as if the were normal interrupts.
> A standard handler is then called by the core IRQ code to deal
> with the IPI.
>
> This means that we don't need to call irq_enter/irq_exit, and
> that we don't need to deal with set_irq_regs either. So let's
> move the dispatcher into its own function, and leave handle_IPI()
> as a compatibility function.
>
> On the sending side, let's make use of ipi_send_mask, which
> already exists for this purpose.
>
> One of the major difference is that we end up, in some cases
> (such as when performing IRQ time accounting on the scheduler
> IPI), end up with nested irq_enter()/irq_exit() pairs.
> Other than the (relatively small) overhead, there should be
> no consequences to it (these pairs are designed to nest
> correctly, and the accounting shouldn't be off).
>
> Reviewed-by: Valentin Schneider <[email protected]>
> Signed-off-by: Marc Zyngier <[email protected]>

In case you need an ack for the arm64 part:

Acked-by: Catalin Marinas <[email protected]>

2020-10-19 15:46:01

by Vincent Guittot

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <[email protected]> wrote:
>
> Hi Vincent,
>
> On 2020-10-19 13:42, Vincent Guittot wrote:
> > Hi Marc,
> >
> > On Tue, 1 Sep 2020 at 16:44, Marc Zyngier <[email protected]> wrote:
> >>
> >> In order to deal with IPIs as normal interrupts, let's add
> >> a new way to register them with the architecture code.
> >>
> >> set_smp_ipi_range() takes a range of interrupts, and allows
> >> the arch code to request them as if the were normal interrupts.
> >> A standard handler is then called by the core IRQ code to deal
> >> with the IPI.
> >>
> >> This means that we don't need to call irq_enter/irq_exit, and
> >> that we don't need to deal with set_irq_regs either. So let's
> >> move the dispatcher into its own function, and leave handle_IPI()
> >> as a compatibility function.
> >>
> >> On the sending side, let's make use of ipi_send_mask, which
> >> already exists for this purpose.
> >>
> >> One of the major difference is that we end up, in some cases
> >> (such as when performing IRQ time accounting on the scheduler
> >> IPI), end up with nested irq_enter()/irq_exit() pairs.
> >> Other than the (relatively small) overhead, there should be
> >> no consequences to it (these pairs are designed to nest
> >> correctly, and the accounting shouldn't be off).
> >
> > While rebasing on mainline, I have faced a performance regression for
> > the benchmark:
> > perf bench sched pipe
> > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2)
> >
> > The regression comes from:
> > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal
> > interrupts")
>
> That's interesting, as this patch doesn't really change anything (most
> of the potential overhead comes in later). The only potential overhead
> I can see is that the scheduler_ipi() call is now wrapped around
> irq_enter()/irq_exit().
>
> >
> > v5.9 + this patch
> > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2%
> > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4%
> >
> > By + this patch, I mean merging branch from this patch. Whereas
> > merging the previous:
> > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from
> > /proc/interrupts")
> > It doesn't show any regression
>
> Since you are running perf, can you spot where the overhead occurs?

hmm... Difficult to say because tracing the bench decreases a lot the
result. I have pasted the perf reports.

With this patch :

# Samples: 634 of event 'cpu-clock'
# Event count (approx.): 158500000
#
# Overhead Command Shared Object Symbol
# ........ .......... .................. ..................................
#
31.86% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore
8.68% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irq
6.31% sched-pipe [kernel.kallsyms] [k] __schedule
5.21% sched-pipe [kernel.kallsyms] [k] schedule
4.73% sched-pipe [kernel.kallsyms] [k] pipe_read
3.31% sched-pipe [kernel.kallsyms] [k] el0_svc_common.constprop.3
2.84% sched-pipe [kernel.kallsyms] [k] ww_mutex_lock_interruptible
2.52% sched-pipe [kernel.kallsyms] [k] init_wait_entry
2.37% sched-pipe [kernel.kallsyms] [k] mutex_unlock
2.21% sched-pipe [kernel.kallsyms] [k] new_sync_read
1.89% sched-pipe [kernel.kallsyms] [k] new_sync_write
1.74% sched-pipe [kernel.kallsyms] [k] security_file_permission
1.74% sched-pipe [kernel.kallsyms] [k] vfs_read
1.58% sched-pipe [kernel.kallsyms] [k] __my_cpu_offset
1.26% sched-pipe libpthread-2.24.so [.] 0x0000000000010a2c
1.10% sched-pipe [kernel.kallsyms] [k] mutex_lock
1.10% sched-pipe [kernel.kallsyms] [k] vfs_write

After reverting this patch which gives a result similar to v5.9:

# Samples: 659 of event 'cpu-clock'
# Event count (approx.): 164750000
#
# Overhead Command Shared Object Symbol
# ........ .......... .................. ...............................
#
29.29% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore
21.40% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irq
4.86% sched-pipe [kernel.kallsyms] [k] pipe_read
4.55% sched-pipe [kernel.kallsyms] [k] ww_mutex_lock_interruptible
2.88% sched-pipe [kernel.kallsyms] [k] __schedule
2.88% sched-pipe [kernel.kallsyms] [k] _raw_spin_lock_irqsave
2.88% sched-pipe [kernel.kallsyms] [k] schedule
2.12% sched-pipe [kernel.kallsyms] [k] new_sync_read
1.82% sched-pipe [kernel.kallsyms] [k] mutex_lock
1.67% sched-pipe [kernel.kallsyms] [k] el0_svc_common.constprop.3
1.67% sched-pipe [kernel.kallsyms] [k] pipe_write
1.21% sched-pipe [kernel.kallsyms] [k] rw_verify_area
1.21% sched-pipe [kernel.kallsyms] [k] security_file_permission
1.06% sched-pipe [kernel.kallsyms] [k] fsnotify

I have only put symbol with overhead above 1%

so _raw_spin_unlock_irq, schedule and __schedule seem the most
impacted but i can't get any conclusion

I can sent you perf.data files if you want

>
> Thanks,
>
> M.
> --
> Jazz is not dead. It just smells funny...

2020-10-19 23:02:56

by Marc Zyngier

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

Hi Vincent,

On 2020-10-19 13:42, Vincent Guittot wrote:
> Hi Marc,
>
> On Tue, 1 Sep 2020 at 16:44, Marc Zyngier <[email protected]> wrote:
>>
>> In order to deal with IPIs as normal interrupts, let's add
>> a new way to register them with the architecture code.
>>
>> set_smp_ipi_range() takes a range of interrupts, and allows
>> the arch code to request them as if the were normal interrupts.
>> A standard handler is then called by the core IRQ code to deal
>> with the IPI.
>>
>> This means that we don't need to call irq_enter/irq_exit, and
>> that we don't need to deal with set_irq_regs either. So let's
>> move the dispatcher into its own function, and leave handle_IPI()
>> as a compatibility function.
>>
>> On the sending side, let's make use of ipi_send_mask, which
>> already exists for this purpose.
>>
>> One of the major difference is that we end up, in some cases
>> (such as when performing IRQ time accounting on the scheduler
>> IPI), end up with nested irq_enter()/irq_exit() pairs.
>> Other than the (relatively small) overhead, there should be
>> no consequences to it (these pairs are designed to nest
>> correctly, and the accounting shouldn't be off).
>
> While rebasing on mainline, I have faced a performance regression for
> the benchmark:
> perf bench sched pipe
> on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2)
>
> The regression comes from:
> commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal
> interrupts")

That's interesting, as this patch doesn't really change anything (most
of the potential overhead comes in later). The only potential overhead
I can see is that the scheduler_ipi() call is now wrapped around
irq_enter()/irq_exit().

>
> v5.9 + this patch
> hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2%
> thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4%
>
> By + this patch, I mean merging branch from this patch. Whereas
> merging the previous:
> commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from
> /proc/interrupts")
> It doesn't show any regression

Since you are running perf, can you spot where the overhead occurs?

Thanks,

M.
--
Jazz is not dead. It just smells funny...

2020-10-20 01:01:07

by Vincent Guittot

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

Hi Marc,

On Tue, 1 Sep 2020 at 16:44, Marc Zyngier <[email protected]> wrote:
>
> In order to deal with IPIs as normal interrupts, let's add
> a new way to register them with the architecture code.
>
> set_smp_ipi_range() takes a range of interrupts, and allows
> the arch code to request them as if the were normal interrupts.
> A standard handler is then called by the core IRQ code to deal
> with the IPI.
>
> This means that we don't need to call irq_enter/irq_exit, and
> that we don't need to deal with set_irq_regs either. So let's
> move the dispatcher into its own function, and leave handle_IPI()
> as a compatibility function.
>
> On the sending side, let's make use of ipi_send_mask, which
> already exists for this purpose.
>
> One of the major difference is that we end up, in some cases
> (such as when performing IRQ time accounting on the scheduler
> IPI), end up with nested irq_enter()/irq_exit() pairs.
> Other than the (relatively small) overhead, there should be
> no consequences to it (these pairs are designed to nest
> correctly, and the accounting shouldn't be off).

While rebasing on mainline, I have faced a performance regression for
the benchmark:
perf bench sched pipe
on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2)

The regression comes from:
commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal interrupts")

v5.9 + this patch
hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2%
thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4%

By + this patch, I mean merging branch from this patch. Whereas
merging the previous:
commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from
/proc/interrupts")
It doesn't show any regression

Vincent

>
> Reviewed-by: Valentin Schneider <[email protected]>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm64/Kconfig | 1 +
> arch/arm64/include/asm/smp.h | 5 ++
> arch/arm64/kernel/smp.c | 93 +++++++++++++++++++++++++++++++-----
> 3 files changed, 87 insertions(+), 12 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 6d232837cbee..d0fdbe5fb32f 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -106,6 +106,7 @@ config ARM64
> select GENERIC_CPU_VULNERABILITIES
> select GENERIC_EARLY_IOREMAP
> select GENERIC_IDLE_POLL_SETUP
> + select GENERIC_IRQ_IPI
> select GENERIC_IRQ_MULTI_HANDLER
> select GENERIC_IRQ_PROBE
> select GENERIC_IRQ_SHOW
> diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
> index 0eadbf933e35..57c5db15f6b7 100644
> --- a/arch/arm64/include/asm/smp.h
> +++ b/arch/arm64/include/asm/smp.h
> @@ -78,6 +78,11 @@ extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));
>
> extern void (*__smp_cross_call)(const struct cpumask *, unsigned int);
>
> +/*
> + * Register IPI interrupts with the arch SMP code
> + */
> +extern void set_smp_ipi_range(int ipi_base, int nr_ipi);
> +
> /*
> * Called from the secondary holding pen, this is the secondary CPU entry point.
> */
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 355ee9eed4dd..00c9db1b61b5 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -75,6 +75,13 @@ enum ipi_msg_type {
> IPI_WAKEUP
> };
>
> +static int ipi_irq_base __read_mostly;
> +static int nr_ipi __read_mostly = NR_IPI;
> +static struct irq_desc *ipi_desc[NR_IPI] __read_mostly;
> +
> +static void ipi_setup(int cpu);
> +static void ipi_teardown(int cpu);
> +
> #ifdef CONFIG_HOTPLUG_CPU
> static int op_cpu_kill(unsigned int cpu);
> #else
> @@ -237,6 +244,8 @@ asmlinkage notrace void secondary_start_kernel(void)
> */
> notify_cpu_starting(cpu);
>
> + ipi_setup(cpu);
> +
> store_cpu_topology(cpu);
> numa_add_cpu(cpu);
>
> @@ -302,6 +311,7 @@ int __cpu_disable(void)
> * and we must not schedule until we're ready to give up the cpu.
> */
> set_cpu_online(cpu, false);
> + ipi_teardown(cpu);
>
> /*
> * OK - migrate IRQs away from this CPU
> @@ -890,10 +900,9 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
> /*
> * Main handler for inter-processor interrupts
> */
> -void handle_IPI(int ipinr, struct pt_regs *regs)
> +static void do_handle_IPI(int ipinr)
> {
> unsigned int cpu = smp_processor_id();
> - struct pt_regs *old_regs = set_irq_regs(regs);
>
> if ((unsigned)ipinr < NR_IPI) {
> trace_ipi_entry_rcuidle(ipi_types[ipinr]);
> @@ -906,21 +915,16 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
> break;
>
> case IPI_CALL_FUNC:
> - irq_enter();
> generic_smp_call_function_interrupt();
> - irq_exit();
> break;
>
> case IPI_CPU_STOP:
> - irq_enter();
> local_cpu_stop();
> - irq_exit();
> break;
>
> case IPI_CPU_CRASH_STOP:
> if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
> - irq_enter();
> - ipi_cpu_crash_stop(cpu, regs);
> + ipi_cpu_crash_stop(cpu, get_irq_regs());
>
> unreachable();
> }
> @@ -928,17 +932,13 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
>
> #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
> case IPI_TIMER:
> - irq_enter();
> tick_receive_broadcast();
> - irq_exit();
> break;
> #endif
>
> #ifdef CONFIG_IRQ_WORK
> case IPI_IRQ_WORK:
> - irq_enter();
> irq_work_run();
> - irq_exit();
> break;
> #endif
>
> @@ -957,9 +957,78 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
>
> if ((unsigned)ipinr < NR_IPI)
> trace_ipi_exit_rcuidle(ipi_types[ipinr]);
> +}
> +
> +/* Legacy version, should go away once all irqchips have been converted */
> +void handle_IPI(int ipinr, struct pt_regs *regs)
> +{
> + struct pt_regs *old_regs = set_irq_regs(regs);
> +
> + irq_enter();
> + do_handle_IPI(ipinr);
> + irq_exit();
> +
> set_irq_regs(old_regs);
> }
>
> +static irqreturn_t ipi_handler(int irq, void *data)
> +{
> + do_handle_IPI(irq - ipi_irq_base);
> + return IRQ_HANDLED;
> +}
> +
> +static void ipi_send(const struct cpumask *target, unsigned int ipi)
> +{
> + __ipi_send_mask(ipi_desc[ipi], target);
> +}
> +
> +static void ipi_setup(int cpu)
> +{
> + int i;
> +
> + if (!ipi_irq_base)
> + return;
> +
> + for (i = 0; i < nr_ipi; i++)
> + enable_percpu_irq(ipi_irq_base + i, 0);
> +}
> +
> +static void ipi_teardown(int cpu)
> +{
> + int i;
> +
> + if (!ipi_irq_base)
> + return;
> +
> + for (i = 0; i < nr_ipi; i++)
> + disable_percpu_irq(ipi_irq_base + i);
> +}
> +
> +void __init set_smp_ipi_range(int ipi_base, int n)
> +{
> + int i;
> +
> + WARN_ON(n < NR_IPI);
> + nr_ipi = min(n, NR_IPI);
> +
> + for (i = 0; i < nr_ipi; i++) {
> + int err;
> +
> + err = request_percpu_irq(ipi_base + i, ipi_handler,
> + "IPI", &irq_stat);
> + WARN_ON(err);
> +
> + ipi_desc[i] = irq_to_desc(ipi_base + i);
> + irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
> + }
> +
> + ipi_irq_base = ipi_base;
> + __smp_cross_call = ipi_send;
> +
> + /* Setup the boot CPU immediately */
> + ipi_setup(smp_processor_id());
> +}
> +
> void smp_send_reschedule(int cpu)
> {
> smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
> --
> 2.27.0
>

2020-10-20 02:31:19

by Valentin Schneider

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

Hi,

On 19/10/20 16:43, Vincent Guittot wrote:
> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <[email protected]> wrote:
>> Since you are running perf, can you spot where the overhead occurs?
>
> hmm... Difficult to say because tracing the bench decreases a lot the
> result. I have pasted the perf reports.
>

<snip>

> I have only put symbol with overhead above 1%
>
> so _raw_spin_unlock_irq, schedule and __schedule seem the most
> impacted but i can't get any conclusion
>

AFAICT on TX2 you should be able to run these and get some more details
within IRQ-disabled regions:

https://lore.kernel.org/linux-arm-kernel/[email protected]/

(they should be on linux-next)

> I can sent you perf.data files if you want
>
>
>>
>> Thanks,
>>
>> M.
>> --
>> Jazz is not dead. It just smells funny...

2020-10-27 14:39:34

by Vincent Guittot

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

On Tue, 27 Oct 2020 at 11:50, Vincent Guittot
<[email protected]> wrote:
>
> On Tue, 27 Oct 2020 at 11:37, Marc Zyngier <[email protected]> wrote:
> >
> > On 2020-10-27 10:12, Vincent Guittot wrote:
> > > HI Marc,
> > >
> > > On Mon, 19 Oct 2020 at 17:43, Vincent Guittot
> > > <[email protected]> wrote:
> > >>
> > >> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <[email protected]> wrote:
> > >> >
> > >
> > > ...
> > >
> > >> > >>
> > >> > >> One of the major difference is that we end up, in some cases
> > >> > >> (such as when performing IRQ time accounting on the scheduler
> > >> > >> IPI), end up with nested irq_enter()/irq_exit() pairs.
> > >> > >> Other than the (relatively small) overhead, there should be
> > >> > >> no consequences to it (these pairs are designed to nest
> > >> > >> correctly, and the accounting shouldn't be off).
> > >> > >
> > >> > > While rebasing on mainline, I have faced a performance regression for
> > >> > > the benchmark:
> > >> > > perf bench sched pipe
> > >> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2)
> > >> > >
> > >> > > The regression comes from:
> > >> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal
> > >> > > interrupts")
> > >> >
> > >> > That's interesting, as this patch doesn't really change anything (most
> > >> > of the potential overhead comes in later). The only potential overhead
> > >> > I can see is that the scheduler_ipi() call is now wrapped around
> > >> > irq_enter()/irq_exit().
> > >> >
> > >> > >
> > >> > > v5.9 + this patch
> > >> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2%
> > >> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4%
> > >> > >
> > >> > > By + this patch, I mean merging branch from this patch. Whereas
> > >> > > merging the previous:
> > >> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from
> > >> > > /proc/interrupts")
> > >> > > It doesn't show any regression
> > >> >
> > >> > Since you are running perf, can you spot where the overhead occurs?
> > >
> > > Any idea about the root cause of the regression ?
> > > I have faced it on more arm64 platforms in the meantime
> >
> > two possible causes:
> >
> > (1) irq_enter/exit on the rescheduling IPI means we reschedule much more
> > often
> > (2) irq_domain lookups add some overhead.
> >
> > For (1), I have this series[1] which is ugly as sin and needs much more
> > testing.
>
> Ok, I'm going to test this series to see if it fixes the perf regression

You have spotted the root cause of the regression. We are back to ~1%
performance diff on the hikey

>
> >
> > For (2), I have some ideas which need more work (let the irq domain
> > resolve to
> > an irq_desc instead of an interrupt number, avoiding another radix-tree
> > lookup).
> >
> > M.
> >
> > [1]
> > https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/ipi-fixes
> > --
> > Jazz is not dead. It just smells funny...

2020-10-27 14:53:34

by Vincent Guittot

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

On Tue, 27 Oct 2020 at 13:06, Marc Zyngier <[email protected]> wrote:
>
> On 2020-10-27 11:21, Vincent Guittot wrote:
> > On Tue, 27 Oct 2020 at 11:50, Vincent Guittot
> > <[email protected]> wrote:
> >>
> >> On Tue, 27 Oct 2020 at 11:37, Marc Zyngier <[email protected]> wrote:
> >> >
> >> > On 2020-10-27 10:12, Vincent Guittot wrote:
> >> > > HI Marc,
> >> > >
> >> > > On Mon, 19 Oct 2020 at 17:43, Vincent Guittot
> >> > > <[email protected]> wrote:
> >> > >>
> >> > >> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <[email protected]> wrote:
> >> > >> >
> >> > >
> >> > > ...
> >> > >
> >> > >> > >>
> >> > >> > >> One of the major difference is that we end up, in some cases
> >> > >> > >> (such as when performing IRQ time accounting on the scheduler
> >> > >> > >> IPI), end up with nested irq_enter()/irq_exit() pairs.
> >> > >> > >> Other than the (relatively small) overhead, there should be
> >> > >> > >> no consequences to it (these pairs are designed to nest
> >> > >> > >> correctly, and the accounting shouldn't be off).
> >> > >> > >
> >> > >> > > While rebasing on mainline, I have faced a performance regression for
> >> > >> > > the benchmark:
> >> > >> > > perf bench sched pipe
> >> > >> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2)
> >> > >> > >
> >> > >> > > The regression comes from:
> >> > >> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal
> >> > >> > > interrupts")
> >> > >> >
> >> > >> > That's interesting, as this patch doesn't really change anything (most
> >> > >> > of the potential overhead comes in later). The only potential overhead
> >> > >> > I can see is that the scheduler_ipi() call is now wrapped around
> >> > >> > irq_enter()/irq_exit().
> >> > >> >
> >> > >> > >
> >> > >> > > v5.9 + this patch
> >> > >> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2%
> >> > >> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4%
> >> > >> > >
> >> > >> > > By + this patch, I mean merging branch from this patch. Whereas
> >> > >> > > merging the previous:
> >> > >> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from
> >> > >> > > /proc/interrupts")
> >> > >> > > It doesn't show any regression
> >> > >> >
> >> > >> > Since you are running perf, can you spot where the overhead occurs?
> >> > >
> >> > > Any idea about the root cause of the regression ?
> >> > > I have faced it on more arm64 platforms in the meantime
> >> >
> >> > two possible causes:
> >> >
> >> > (1) irq_enter/exit on the rescheduling IPI means we reschedule much more
> >> > often
> >> > (2) irq_domain lookups add some overhead.
> >> >
> >> > For (1), I have this series[1] which is ugly as sin and needs much more
> >> > testing.
> >>
> >> Ok, I'm going to test this series to see if it fixes the perf
> >> regression
> >
> > You have spotted the root cause of the regression. We are back to ~1%
> > performance diff on the hikey
>
> Yeah. Only thing is that I can't look at this hack without vomiting...

At least, we know the root cause and the impact of irq_enter/exit
>
> M.
> --
> Jazz is not dead. It just smells funny...

2020-10-28 06:45:39

by Vincent Guittot

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

HI Marc,

On Mon, 19 Oct 2020 at 17:43, Vincent Guittot
<[email protected]> wrote:
>
> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <[email protected]> wrote:
> >

...

> > >>
> > >> One of the major difference is that we end up, in some cases
> > >> (such as when performing IRQ time accounting on the scheduler
> > >> IPI), end up with nested irq_enter()/irq_exit() pairs.
> > >> Other than the (relatively small) overhead, there should be
> > >> no consequences to it (these pairs are designed to nest
> > >> correctly, and the accounting shouldn't be off).
> > >
> > > While rebasing on mainline, I have faced a performance regression for
> > > the benchmark:
> > > perf bench sched pipe
> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2)
> > >
> > > The regression comes from:
> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal
> > > interrupts")
> >
> > That's interesting, as this patch doesn't really change anything (most
> > of the potential overhead comes in later). The only potential overhead
> > I can see is that the scheduler_ipi() call is now wrapped around
> > irq_enter()/irq_exit().
> >
> > >
> > > v5.9 + this patch
> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2%
> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4%
> > >
> > > By + this patch, I mean merging branch from this patch. Whereas
> > > merging the previous:
> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from
> > > /proc/interrupts")
> > > It doesn't show any regression
> >
> > Since you are running perf, can you spot where the overhead occurs?

Any idea about the root cause of the regression ?
I have faced it on more arm64 platforms in the meantime

>
> hmm... Difficult to say because tracing the bench decreases a lot the
> result. I have pasted the perf reports.
>
> With this patch :
>
> # Samples: 634 of event 'cpu-clock'
> # Event count (approx.): 158500000
> #
> # Overhead Command Shared Object Symbol
> # ........ .......... .................. ..................................
> #
> 31.86% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore
> 8.68% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irq
> 6.31% sched-pipe [kernel.kallsyms] [k] __schedule
> 5.21% sched-pipe [kernel.kallsyms] [k] schedule
> 4.73% sched-pipe [kernel.kallsyms] [k] pipe_read
> 3.31% sched-pipe [kernel.kallsyms] [k] el0_svc_common.constprop.3
> 2.84% sched-pipe [kernel.kallsyms] [k] ww_mutex_lock_interruptible
> 2.52% sched-pipe [kernel.kallsyms] [k] init_wait_entry
> 2.37% sched-pipe [kernel.kallsyms] [k] mutex_unlock
> 2.21% sched-pipe [kernel.kallsyms] [k] new_sync_read
> 1.89% sched-pipe [kernel.kallsyms] [k] new_sync_write
> 1.74% sched-pipe [kernel.kallsyms] [k] security_file_permission
> 1.74% sched-pipe [kernel.kallsyms] [k] vfs_read
> 1.58% sched-pipe [kernel.kallsyms] [k] __my_cpu_offset
> 1.26% sched-pipe libpthread-2.24.so [.] 0x0000000000010a2c
> 1.10% sched-pipe [kernel.kallsyms] [k] mutex_lock
> 1.10% sched-pipe [kernel.kallsyms] [k] vfs_write
>
> After reverting this patch which gives a result similar to v5.9:
>
> # Samples: 659 of event 'cpu-clock'
> # Event count (approx.): 164750000
> #
> # Overhead Command Shared Object Symbol
> # ........ .......... .................. ...............................
> #
> 29.29% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore
> 21.40% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irq
> 4.86% sched-pipe [kernel.kallsyms] [k] pipe_read
> 4.55% sched-pipe [kernel.kallsyms] [k] ww_mutex_lock_interruptible
> 2.88% sched-pipe [kernel.kallsyms] [k] __schedule
> 2.88% sched-pipe [kernel.kallsyms] [k] _raw_spin_lock_irqsave
> 2.88% sched-pipe [kernel.kallsyms] [k] schedule
> 2.12% sched-pipe [kernel.kallsyms] [k] new_sync_read
> 1.82% sched-pipe [kernel.kallsyms] [k] mutex_lock
> 1.67% sched-pipe [kernel.kallsyms] [k] el0_svc_common.constprop.3
> 1.67% sched-pipe [kernel.kallsyms] [k] pipe_write
> 1.21% sched-pipe [kernel.kallsyms] [k] rw_verify_area
> 1.21% sched-pipe [kernel.kallsyms] [k] security_file_permission
> 1.06% sched-pipe [kernel.kallsyms] [k] fsnotify
>
> I have only put symbol with overhead above 1%
>
> so _raw_spin_unlock_irq, schedule and __schedule seem the most
> impacted but i can't get any conclusion
>
> I can sent you perf.data files if you want
>
>
> >
> > Thanks,
> >
> > M.
> > --
> > Jazz is not dead. It just smells funny...

2020-10-28 06:54:50

by Marc Zyngier

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

On 2020-10-27 10:12, Vincent Guittot wrote:
> HI Marc,
>
> On Mon, 19 Oct 2020 at 17:43, Vincent Guittot
> <[email protected]> wrote:
>>
>> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <[email protected]> wrote:
>> >
>
> ...
>
>> > >>
>> > >> One of the major difference is that we end up, in some cases
>> > >> (such as when performing IRQ time accounting on the scheduler
>> > >> IPI), end up with nested irq_enter()/irq_exit() pairs.
>> > >> Other than the (relatively small) overhead, there should be
>> > >> no consequences to it (these pairs are designed to nest
>> > >> correctly, and the accounting shouldn't be off).
>> > >
>> > > While rebasing on mainline, I have faced a performance regression for
>> > > the benchmark:
>> > > perf bench sched pipe
>> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2)
>> > >
>> > > The regression comes from:
>> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal
>> > > interrupts")
>> >
>> > That's interesting, as this patch doesn't really change anything (most
>> > of the potential overhead comes in later). The only potential overhead
>> > I can see is that the scheduler_ipi() call is now wrapped around
>> > irq_enter()/irq_exit().
>> >
>> > >
>> > > v5.9 + this patch
>> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2%
>> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4%
>> > >
>> > > By + this patch, I mean merging branch from this patch. Whereas
>> > > merging the previous:
>> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from
>> > > /proc/interrupts")
>> > > It doesn't show any regression
>> >
>> > Since you are running perf, can you spot where the overhead occurs?
>
> Any idea about the root cause of the regression ?
> I have faced it on more arm64 platforms in the meantime

two possible causes:

(1) irq_enter/exit on the rescheduling IPI means we reschedule much more
often
(2) irq_domain lookups add some overhead.

For (1), I have this series[1] which is ugly as sin and needs much more
testing.

For (2), I have some ideas which need more work (let the irq domain
resolve to
an irq_desc instead of an interrupt number, avoiding another radix-tree
lookup).

M.

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/ipi-fixes
--
Jazz is not dead. It just smells funny...

2020-10-28 06:58:53

by Vincent Guittot

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

On Tue, 27 Oct 2020 at 11:37, Marc Zyngier <[email protected]> wrote:
>
> On 2020-10-27 10:12, Vincent Guittot wrote:
> > HI Marc,
> >
> > On Mon, 19 Oct 2020 at 17:43, Vincent Guittot
> > <[email protected]> wrote:
> >>
> >> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <[email protected]> wrote:
> >> >
> >
> > ...
> >
> >> > >>
> >> > >> One of the major difference is that we end up, in some cases
> >> > >> (such as when performing IRQ time accounting on the scheduler
> >> > >> IPI), end up with nested irq_enter()/irq_exit() pairs.
> >> > >> Other than the (relatively small) overhead, there should be
> >> > >> no consequences to it (these pairs are designed to nest
> >> > >> correctly, and the accounting shouldn't be off).
> >> > >
> >> > > While rebasing on mainline, I have faced a performance regression for
> >> > > the benchmark:
> >> > > perf bench sched pipe
> >> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2)
> >> > >
> >> > > The regression comes from:
> >> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal
> >> > > interrupts")
> >> >
> >> > That's interesting, as this patch doesn't really change anything (most
> >> > of the potential overhead comes in later). The only potential overhead
> >> > I can see is that the scheduler_ipi() call is now wrapped around
> >> > irq_enter()/irq_exit().
> >> >
> >> > >
> >> > > v5.9 + this patch
> >> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2%
> >> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4%
> >> > >
> >> > > By + this patch, I mean merging branch from this patch. Whereas
> >> > > merging the previous:
> >> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from
> >> > > /proc/interrupts")
> >> > > It doesn't show any regression
> >> >
> >> > Since you are running perf, can you spot where the overhead occurs?
> >
> > Any idea about the root cause of the regression ?
> > I have faced it on more arm64 platforms in the meantime
>
> two possible causes:
>
> (1) irq_enter/exit on the rescheduling IPI means we reschedule much more
> often
> (2) irq_domain lookups add some overhead.
>
> For (1), I have this series[1] which is ugly as sin and needs much more
> testing.

Ok, I'm going to test this series to see if it fixes the perf regression

>
> For (2), I have some ideas which need more work (let the irq domain
> resolve to
> an irq_desc instead of an interrupt number, avoiding another radix-tree
> lookup).
>
> M.
>
> [1]
> https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/ipi-fixes
> --
> Jazz is not dead. It just smells funny...

2020-10-28 07:22:04

by Marc Zyngier

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

On 2020-10-27 11:21, Vincent Guittot wrote:
> On Tue, 27 Oct 2020 at 11:50, Vincent Guittot
> <[email protected]> wrote:
>>
>> On Tue, 27 Oct 2020 at 11:37, Marc Zyngier <[email protected]> wrote:
>> >
>> > On 2020-10-27 10:12, Vincent Guittot wrote:
>> > > HI Marc,
>> > >
>> > > On Mon, 19 Oct 2020 at 17:43, Vincent Guittot
>> > > <[email protected]> wrote:
>> > >>
>> > >> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <[email protected]> wrote:
>> > >> >
>> > >
>> > > ...
>> > >
>> > >> > >>
>> > >> > >> One of the major difference is that we end up, in some cases
>> > >> > >> (such as when performing IRQ time accounting on the scheduler
>> > >> > >> IPI), end up with nested irq_enter()/irq_exit() pairs.
>> > >> > >> Other than the (relatively small) overhead, there should be
>> > >> > >> no consequences to it (these pairs are designed to nest
>> > >> > >> correctly, and the accounting shouldn't be off).
>> > >> > >
>> > >> > > While rebasing on mainline, I have faced a performance regression for
>> > >> > > the benchmark:
>> > >> > > perf bench sched pipe
>> > >> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2)
>> > >> > >
>> > >> > > The regression comes from:
>> > >> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal
>> > >> > > interrupts")
>> > >> >
>> > >> > That's interesting, as this patch doesn't really change anything (most
>> > >> > of the potential overhead comes in later). The only potential overhead
>> > >> > I can see is that the scheduler_ipi() call is now wrapped around
>> > >> > irq_enter()/irq_exit().
>> > >> >
>> > >> > >
>> > >> > > v5.9 + this patch
>> > >> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2%
>> > >> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4%
>> > >> > >
>> > >> > > By + this patch, I mean merging branch from this patch. Whereas
>> > >> > > merging the previous:
>> > >> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from
>> > >> > > /proc/interrupts")
>> > >> > > It doesn't show any regression
>> > >> >
>> > >> > Since you are running perf, can you spot where the overhead occurs?
>> > >
>> > > Any idea about the root cause of the regression ?
>> > > I have faced it on more arm64 platforms in the meantime
>> >
>> > two possible causes:
>> >
>> > (1) irq_enter/exit on the rescheduling IPI means we reschedule much more
>> > often
>> > (2) irq_domain lookups add some overhead.
>> >
>> > For (1), I have this series[1] which is ugly as sin and needs much more
>> > testing.
>>
>> Ok, I'm going to test this series to see if it fixes the perf
>> regression
>
> You have spotted the root cause of the regression. We are back to ~1%
> performance diff on the hikey

Yeah. Only thing is that I can't look at this hack without vomiting...

M.
--
Jazz is not dead. It just smells funny...

2021-05-06 18:42:52

by Marc Zyngier

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

On Thu, 06 May 2021 08:50:42 +0100,
He Ying <[email protected]> wrote:
>
> Hello Marc,
>
> We have faced a performance regression for handling ipis since this
> commit. I think it's the same issue reported by Vincent.

Can you share more details on what regression you have observed?
What's the workload, the system, the performance drop?

> I found you pointed out the possible two causes:
>
> (1) irq_enter/exit on the rescheduling IPI means we reschedule much
> more often.

It turned out to be a red herring. We don't reschedule more often, but
we instead suffer from the overhead of irq_enter()/irq_exit().
However, this only matters for silly benchmarks, and no real-life
workload showed any significant regression. Have you identified such
realistic workload?

> (2) irq_domain lookups add some overhead.

While this is also a potential source of overhead, it turned out not
to be the case.

> But I don't see any following patches in mainline. So, are you still
> working on this issue? Looking forward to your reply.

See [1]. However, there is probably better things to do than this
low-level specialisation of IPIs, and Thomas outlined what needs to be
done (see v1 of the patch series).

Thanks,

M.

[1] https://lore.kernel.org/lkml/[email protected]/

--
Without deviation from the norm, progress is not possible.

2021-05-07 08:49:07

by He Ying

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

?? 2021/5/6 19:44, Marc Zyngier д??:
> On Thu, 06 May 2021 08:50:42 +0100,
> He Ying <[email protected]> wrote:
>> Hello Marc,
>>
>> We have faced a performance regression for handling ipis since this
>> commit. I think it's the same issue reported by Vincent.
> Can you share more details on what regression you have observed?
> What's the workload, the system, the performance drop?

OK. We have just calculated the pmu cycles from the entry of gic_handle_irq

to the entry of do_handle_ipi. Here is some more information about our test:

CPU: Hisilicon hip05-d02

Applying the patch series: 1115 cycles

Reverting the patch series: 599 cycles

>
>> I found you pointed out the possible two causes:
>>
>> (1) irq_enter/exit on the rescheduling IPI means we reschedule much
>> more often.
> It turned out to be a red herring. We don't reschedule more often, but
> we instead suffer from the overhead of irq_enter()/irq_exit().
> However, this only matters for silly benchmarks, and no real-life
> workload showed any significant regression. Have you identified such
> realistic workload?

I'm afraid not. We just run some benchmarks and calculated pmu cycle
counters.

But we have observed running time from the entry of gic_handle_irq to
the entry

of do_handle_ipi almost doubles. Doesn't it affect realistic workload?

>
>> (2) irq_domain lookups add some overhead.
> While this is also a potential source of overhead, it turned out not
> to be the case.
OK.
>
>> But I don't see any following patches in mainline. So, are you still
>> working on this issue? Looking forward to your reply.
> See [1]. However, there is probably better things to do than this
> low-level specialisation of IPIs, and Thomas outlined what needs to be
> done (see v1 of the patch series).

OK. I see the patch series. Would it be applied to the mainline someday?
I notice

that more than 5 months have passed since you sent the patch series.

Thanks.

>
> Thanks,
>
> M.
>
> [1] https://lore.kernel.org/lkml/[email protected]/
>

2021-05-07 09:48:52

by Marc Zyngier

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

On Fri, 07 May 2021 08:30:06 +0100,
He Ying <[email protected]> wrote:
>
>
> 在 2021/5/6 19:44, Marc Zyngier 写道:
> > On Thu, 06 May 2021 08:50:42 +0100,
> > He Ying <[email protected]> wrote:
> >> Hello Marc,
> >>
> >> We have faced a performance regression for handling ipis since this
> >> commit. I think it's the same issue reported by Vincent.
> > Can you share more details on what regression you have observed?
> > What's the workload, the system, the performance drop?
>
> OK. We have just calculated the pmu cycles from the entry of gic_handle_irq
> to the entry of do_handle_ipi. Here is some more information about our test:
>
> CPU: Hisilicon hip05-d02
>
> Applying the patch series: 1115 cycles
> Reverting the patch series: 599 cycles

And? How is that meaningful? Interrupts are pretty rare compared to
everything that happens in the system. How does it affect the
behaviour of the system as a whole?

>
> >
> >> I found you pointed out the possible two causes:
> >>
> >> (1) irq_enter/exit on the rescheduling IPI means we reschedule much
> >> more often.
> > It turned out to be a red herring. We don't reschedule more often, but
> > we instead suffer from the overhead of irq_enter()/irq_exit().
> > However, this only matters for silly benchmarks, and no real-life
> > workload showed any significant regression. Have you identified such
> > realistic workload?
>
> I'm afraid not. We just run some benchmarks and calculated pmu cycle
> counters. But we have observed running time from the entry of
> gic_handle_irq to the entry of do_handle_ipi almost doubles. Doesn't
> it affect realistic workload?

Then I'm not that interested. Show me an actual regression in a real
workload that affects people, and I'll be a bit more sympathetic to
your complain. But quoting raw numbers do not help.

There is a number of advantages to having IPI as IRQs, as it allows us
to deal with proper allocation (other subsystem want to use IPIs), and
eventually NMIs. There is a trade-off, and if that means wasting a few
cycles, so be it.

> >> (2) irq_domain lookups add some overhead.
> > While this is also a potential source of overhead, it turned out not
> > to be the case.
> OK.
> >
> >> But I don't see any following patches in mainline. So, are you still
> >> working on this issue? Looking forward to your reply.
> > See [1]. However, there is probably better things to do than this
> > low-level specialisation of IPIs, and Thomas outlined what needs to be
> > done (see v1 of the patch series).
>
> OK. I see the patch series. Would it be applied to the mainline
> someday? I notice that more than 5 months have passed since you sent
> the patch series.

I have no plan to merge these patches any time soon, given that nobody
has shown a measurable regression using something other than a trivial
benchmark. If you come up with such an example, I will of course
reconsider this position.

Thanks,

M.

--
Without deviation from the norm, progress is not possible.

2021-05-07 12:27:11

by He Ying

[permalink] [raw]

Subject: Re: [PATCH v3 03/16] arm64: Allow IPIs to be handled as normal interrupts

在 2021/5/7 16:56, Marc Zyngier 写道:
> On Fri, 07 May 2021 08:30:06 +0100,
> He Ying <[email protected]> wrote:
>>
>> 在 2021/5/6 19:44, Marc Zyngier 写道:
>>> On Thu, 06 May 2021 08:50:42 +0100,
>>> He Ying <[email protected]> wrote:
>>>> Hello Marc,
>>>>
>>>> We have faced a performance regression for handling ipis since this
>>>> commit. I think it's the same issue reported by Vincent.
>>> Can you share more details on what regression you have observed?
>>> What's the workload, the system, the performance drop?
>> OK. We have just calculated the pmu cycles from the entry of gic_handle_irq
>> to the entry of do_handle_ipi. Here is some more information about our test:
>>
>> CPU: Hisilicon hip05-d02
>>
>> Applying the patch series: 1115 cycles
>> Reverting the patch series: 599 cycles
> And? How is that meaningful? Interrupts are pretty rare compared to
> everything that happens in the system. How does it affect the
> behaviour of the system as a whole?
OK.
>
>>>> I found you pointed out the possible two causes:
>>>>
>>>> (1) irq_enter/exit on the rescheduling IPI means we reschedule much
>>>> more often.
>>> It turned out to be a red herring. We don't reschedule more often, but
>>> we instead suffer from the overhead of irq_enter()/irq_exit().
>>> However, this only matters for silly benchmarks, and no real-life
>>> workload showed any significant regression. Have you identified such
>>> realistic workload?
>> I'm afraid not. We just run some benchmarks and calculated pmu cycle
>> counters. But we have observed running time from the entry of
>> gic_handle_irq to the entry of do_handle_ipi almost doubles. Doesn't
>> it affect realistic workload?
> Then I'm not that interested. Show me an actual regression in a real
> workload that affects people, and I'll be a bit more sympathetic to
> your complain. But quoting raw numbers do not help.
>
> There is a number of advantages to having IPI as IRQs, as it allows us
> to deal with proper allocation (other subsystem want to use IPIs), and
> eventually NMIs. There is a trade-off, and if that means wasting a few
> cycles, so be it.
OK. I see.
>
>>>> (2) irq_domain lookups add some overhead.
>>> While this is also a potential source of overhead, it turned out not
>>> to be the case.
>> OK.
>>>> But I don't see any following patches in mainline. So, are you still
>>>> working on this issue? Looking forward to your reply.
>>> See [1]. However, there is probably better things to do than this
>>> low-level specialisation of IPIs, and Thomas outlined what needs to be
>>> done (see v1 of the patch series).
>> OK. I see the patch series. Would it be applied to the mainline
>> someday? I notice that more than 5 months have passed since you sent
>> the patch series.
> I have no plan to merge these patches any time soon, given that nobody
> has shown a measurable regression using something other than a trivial
> benchmark. If you come up with such an example, I will of course
> reconsider this position.

OK. Thanks a lot for all your reply. If I come up with a measurable
regression

with a realistic workload, I'll contact you again.

Thanks.

>
> Thanks,
>
> M.
>