Enable support IPI_CPU_CRASH_STOP to be pseudo-NMI
This patchset enables IPI_CPU_CRASH_STOP IPI to be pseudo-NMI.
This allows kdump to collect system information even when the CPU is in
a HARDLOCKUP state.
Only IPI_CPU_CRASH_STOP uses NMI and the other IPIs remain normal IRQs.
The patch has been tested on ThunderX.
This patch assumes Marc's latest IPIs patch-set. [1]
It also uses some of Sumit's IPI patch set for NMI.[2]
[1] https://lore.kernel.org/linux-arm-kernel/[email protected]/
[2] https://lore.kernel.org/linux-arm-kernel/[email protected]/
$ echo 1 > /proc/sys/kernel/panic_on_rcu_stal
$ echo HARDLOCKUP > /sys/kernel/debug/provoke-crash/DIRECT
: kernel panics and crash kernel boot
: makedumpfile saves the system state at HARDLOCKUP in vmcore.
crash utility:
crash> bt
PID: 3213 TASK: fffffd001adc5940 CPU: 8 COMMAND: "bash"
#0 [fffffe0022fefcf0] lkdtm_HARDLOCKUP at fffffe0010888ab4
#1 [fffffe0022fefd10] lkdtm_do_action at fffffe00108882bc
#2 [fffffe0022fefd20] direct_entry at fffffe0010888720
#3 [fffffe0022fefd70] full_proxy_write at fffffe001058cfe4
#4 [fffffe0022fefdb0] vfs_write at fffffe00104a4c2c
#5 [fffffe0022fefdf0] ksys_write at fffffe00104a4f0c
#6 [fffffe0022fefe40] __arm64_sys_write at fffffe00104a4fbc
#7 [fffffe0022fefe50] el0_svc_common.constprop.0 at fffffe0010159e38
#8 [fffffe0022fefe80] do_el0_svc at fffffe0010159fa0
#9 [fffffe0022fefe90] el0_svc at fffffe00101481d0
#10 [fffffe0022fefea0] el0_sync_handler at fffffe00101484b4
#11 [fffffe0022fefff0] el0_sync at fffffe0010142b7c
Sumit Garg (1):
irqchip/gic-v3: Enable support for SGIs to act as NMIs
Yuichi Ito (1):
Register IPI_CPU_CRASH_STOP IPI as pseudo-NMI
arch/arm64/kernel/smp.c | 39 ++++++++++++++++++++++++++++--------
drivers/irqchip/irq-gic-v3.c | 13 ++++++++++--
2 files changed, 42 insertions(+), 10 deletions(-)
--
2.25.1
Register IPI_CPU_CRASH_STOP IPI as pseudo-NMI.
For systems that do not support pseudo-NMI, register as a normal IRQ.
Signed-off-by: Yuichi Ito <[email protected]>
---
arch/arm64/kernel/smp.c | 39 +++++++++++++++++++++++++++++++--------
1 file changed, 31 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b6bde2675ccc..d929dd7221ff 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -79,6 +79,8 @@ enum ipi_msg_type {
static int ipi_irq_base __read_mostly;
static int nr_ipi __read_mostly = NR_IPI;
static struct irq_desc *ipi_desc[NR_IPI] __read_mostly;
+static int ipi_crash_stop = -1;
+static int ipi_crash_stop_enable_nmi;
static void ipi_setup(int cpu);
static void ipi_teardown(int cpu);
@@ -954,8 +956,16 @@ static void ipi_setup(int cpu)
if (WARN_ON_ONCE(!ipi_irq_base))
return;
- for (i = 0; i < nr_ipi; i++)
- enable_percpu_irq(ipi_irq_base + i, 0);
+ for (i = 0; i < nr_ipi; i++) {
+ if (ipi_irq_base + i == ipi_crash_stop) {
+ if (!prepare_percpu_nmi(ipi_irq_base + i)) {
+ enable_percpu_nmi(ipi_irq_base + i, 0);
+ ipi_crash_stop_enable_nmi = 1;
+ } else
+ pr_crit("CPU%u: IPI_CPU_CRASH_STOP cannot be enabled NMI.\n", cpu);
+ } else
+ enable_percpu_irq(ipi_irq_base + i, 0);
+ }
}
static void ipi_teardown(int cpu)
@@ -965,23 +975,37 @@ static void ipi_teardown(int cpu)
if (WARN_ON_ONCE(!ipi_irq_base))
return;
- for (i = 0; i < nr_ipi; i++)
- disable_percpu_irq(ipi_irq_base + i);
+ for (i = 0; i < nr_ipi; i++) {
+ if (ipi_irq_base + i == ipi_crash_stop) {
+ if (ipi_crash_stop_enable_nmi) {
+ disable_percpu_nmi(ipi_irq_base + i);
+ teardown_percpu_nmi(ipi_irq_base + i);
+ }
+ } else
+ disable_percpu_irq(ipi_irq_base + i);
+ }
}
void __init set_smp_ipi_range(int ipi_base, int n)
{
- int i;
+ int i, ret;
WARN_ON(n < NR_IPI);
nr_ipi = min(n, NR_IPI);
+ ret = request_percpu_nmi(ipi_base + IPI_CPU_CRASH_STOP,
+ ipi_handler, "IPI", &cpu_number);
+ if (!ret)
+ ipi_crash_stop = ipi_base + IPI_CPU_CRASH_STOP;
+
for (i = 0; i < nr_ipi; i++) {
int err;
- err = request_percpu_irq(ipi_base + i, ipi_handler,
- "IPI", &cpu_number);
- WARN_ON(err);
+ if (ipi_base + i != ipi_crash_stop) {
+ err = request_percpu_irq(ipi_base + i, ipi_handler,
+ "IPI", &cpu_number);
+ WARN_ON(err);
+ }
ipi_desc[i] = irq_to_desc(ipi_base + i);
irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
--
2.25.1
From: Sumit Garg <[email protected]>
Add support to handle SGIs as regular NMIs. As SGIs or IPIs defaults to a
special flow handler: handle_percpu_devid_fasteoi_ipi(), so skip NMI
handler update in case of SGIs.
Also, enable NMI support prior to gic_smp_init() as allocation of SGIs
as IRQs/NMIs happen as part of this routine.
Signed-off-by: Sumit Garg <[email protected]>
---
drivers/irqchip/irq-gic-v3.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 262bc7377abd..2eda18d1df59 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -477,6 +477,11 @@ static int gic_irq_nmi_setup(struct irq_data *d)
if (WARN_ON(gic_irq(d) >= 8192))
return -EINVAL;
+ if (get_intid_range(d) == SGI_RANGE) {
+ gic_irq_set_prio(d, GICD_INT_NMI_PRI);
+ return 0;
+ }
+
/* desc lock should already be held */
if (gic_irq_in_rdist(d)) {
u32 idx = gic_get_ppi_index(d);
@@ -514,6 +519,11 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
if (WARN_ON(gic_irq(d) >= 8192))
return;
+ if (get_intid_range(d) == SGI_RANGE) {
+ gic_irq_set_prio(d, GICD_INT_DEF_PRI);
+ return;
+ }
+
/* desc lock should already be held */
if (gic_irq_in_rdist(d)) {
u32 idx = gic_get_ppi_index(d);
@@ -1708,6 +1718,7 @@ static int __init gic_init_bases(void __iomem *dist_base,
gic_dist_init();
gic_cpu_init();
+ gic_enable_nmi_support();
gic_smp_init();
gic_cpu_pm_init();
@@ -1719,8 +1730,6 @@ static int __init gic_init_bases(void __iomem *dist_base,
gicv2m_init(handle, gic_data.domain);
}
- gic_enable_nmi_support();
-
return 0;
out_free:
--
2.25.1
Hi Marc, Sumit
I would appreciate if you have any advice on this patch.
Yuichi Ito
> -----Original Message-----
> From: Yuichi Ito <[email protected]>
> Sent: Thursday, September 24, 2020 1:43 PM
> To: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; [email protected]
> Cc: [email protected]; [email protected]; Ito,
> Yuichi/$B0KF#(B $BM-0l(B <[email protected]>
> Subject: [PATCH 0/2] Enable support IPI_CPU_CRASH_STOP to be
> pseudo-NMI
>
> Enable support IPI_CPU_CRASH_STOP to be pseudo-NMI
>
> This patchset enables IPI_CPU_CRASH_STOP IPI to be pseudo-NMI.
> This allows kdump to collect system information even when the CPU is in a
> HARDLOCKUP state.
>
> Only IPI_CPU_CRASH_STOP uses NMI and the other IPIs remain normal
> IRQs.
>
> The patch has been tested on ThunderX.
>
> This patch assumes Marc's latest IPIs patch-set. [1] It also uses some of
> Sumit's IPI patch set for NMI.[2]
>
> [1]
> https://lore.kernel.org/linux-arm-kernel/20200901144324.1071694-1-maz@ke
> rnel.org/
> [2]
> https://lore.kernel.org/linux-arm-kernel/1599830924-13990-3-git-send-email
> [email protected]/
>
> $ echo 1 > /proc/sys/kernel/panic_on_rcu_stal
> $ echo HARDLOCKUP > /sys/kernel/debug/provoke-crash/DIRECT
> : kernel panics and crash kernel boot
> : makedumpfile saves the system state at HARDLOCKUP in vmcore.
>
> crash utility:
> crash> bt
> PID: 3213 TASK: fffffd001adc5940 CPU: 8 COMMAND: "bash"
> #0 [fffffe0022fefcf0] lkdtm_HARDLOCKUP at fffffe0010888ab4
> #1 [fffffe0022fefd10] lkdtm_do_action at fffffe00108882bc
> #2 [fffffe0022fefd20] direct_entry at fffffe0010888720
> #3 [fffffe0022fefd70] full_proxy_write at fffffe001058cfe4
> #4 [fffffe0022fefdb0] vfs_write at fffffe00104a4c2c
> #5 [fffffe0022fefdf0] ksys_write at fffffe00104a4f0c
> #6 [fffffe0022fefe40] __arm64_sys_write at fffffe00104a4fbc
> #7 [fffffe0022fefe50] el0_svc_common.constprop.0 at fffffe0010159e38
> #8 [fffffe0022fefe80] do_el0_svc at fffffe0010159fa0
> #9 [fffffe0022fefe90] el0_svc at fffffe00101481d0
> #10 [fffffe0022fefea0] el0_sync_handler at fffffe00101484b4
> #11 [fffffe0022fefff0] el0_sync at fffffe0010142b7c
>
>
> Sumit Garg (1):
> irqchip/gic-v3: Enable support for SGIs to act as NMIs
>
> Yuichi Ito (1):
> Register IPI_CPU_CRASH_STOP IPI as pseudo-NMI
>
> arch/arm64/kernel/smp.c | 39
> ++++++++++++++++++++++++++++--------
> drivers/irqchip/irq-gic-v3.c | 13 ++++++++++--
> 2 files changed, 42 insertions(+), 10 deletions(-)
>
> --
> 2.25.1
On 2020-09-28 03:43, [email protected] wrote:
> Hi Marc, Sumit
>
> I would appreciate if you have any advice on this patch.
I haven't had a chance to look into it, as I'm not even sure I'll
take the core series in the first place (there are outstanding
regressions I can't reproduce, let alone fix them).
>
> Yuichi Ito
>
>> -----Original Message-----
>> From: Yuichi Ito <[email protected]>
>> Sent: Thursday, September 24, 2020 1:43 PM
>> To: [email protected]; [email protected]; [email protected];
>> [email protected]; [email protected]; [email protected]
>> Cc: [email protected];
>> [email protected]; Ito,
>> Yuichi/伊藤 有一 <[email protected]>
>> Subject: [PATCH 0/2] Enable support IPI_CPU_CRASH_STOP to be
>> pseudo-NMI
>>
>> Enable support IPI_CPU_CRASH_STOP to be pseudo-NMI
>>
>> This patchset enables IPI_CPU_CRASH_STOP IPI to be pseudo-NMI.
>> This allows kdump to collect system information even when the CPU is
>> in a
>> HARDLOCKUP state.
>>
>> Only IPI_CPU_CRASH_STOP uses NMI and the other IPIs remain normal
>> IRQs.
>>
>> The patch has been tested on ThunderX.
Which ThunderX? TX2 (at least the incarnation I used in the past) wasn't
able to correctly deal with priorities.
M.
--
Jazz is not dead. It just smells funny...
Hi Marc
Thank you for your reply.
> On 2020-09-28 03:43, [email protected] wrote:
> > Hi Marc, Sumit
> >
> > I would appreciate if you have any advice on this patch.
>
> I haven't had a chance to look into it, as I'm not even sure I'll take the core
> series in the first place (there are outstanding regressions I can't reproduce,
> let alone fix them).
>
I understand it.
Please let me know if there is anything I can do.
I sincerely hope that your patches will be merged into the mainline.
> >
> > Yuichi Ito
> >
> >> Enable support IPI_CPU_CRASH_STOP to be pseudo-NMI
> >>
> >> This patchset enables IPI_CPU_CRASH_STOP IPI to be pseudo-NMI.
> >> This allows kdump to collect system information even when the CPU is
> >> in a HARDLOCKUP state.
> >>
> >> Only IPI_CPU_CRASH_STOP uses NMI and the other IPIs remain normal
> >> IRQs.
> >>
> >> The patch has been tested on ThunderX.
>
> Which ThunderX? TX2 (at least the incarnation I used in the past) wasn't able
> to correctly deal with priorities.
I tried it with ThunderX CN8890.
If you tell me steps to reproduce the problem of TX2, I will investigate it with TX as well.
> M.
> --
> Jazz is not dead. It just smells funny...
Thank you and best regards,
Yuichi Ito
On 2020-09-29 06:50, [email protected] wrote:
> Hi Marc
[...]
>> >> The patch has been tested on ThunderX.
>>
>> Which ThunderX? TX2 (at least the incarnation I used in the past)
>> wasn't able
>> to correctly deal with priorities.
>
> I tried it with ThunderX CN8890.
> If you tell me steps to reproduce the problem of TX2, I will
> investigate it with TX as well.
PMR_EL1 reporting fantasy values, non-uniform priority support across
the interrupt classes, and generally prone to lockups. The original TX
is a very different machine though (TX 1 and 2 only share the engraving
of the manufacturer on the heat-spreader).
M.
--
Jazz is not dead. It just smells funny...
Hi Marc
>
> On 2020-09-29 06:50, [email protected] wrote:
> > Hi Marc
>
> [...]
>
> >> >> The patch has been tested on ThunderX.
> >>
> >> Which ThunderX? TX2 (at least the incarnation I used in the past)
> >> wasn't able
> >> to correctly deal with priorities.
> >
> > I tried it with ThunderX CN8890.
> > If you tell me steps to reproduce the problem of TX2, I will
> > investigate it with TX as well.
>
> PMR_EL1 reporting fantasy values, non-uniform priority support across
> the interrupt classes, and generally prone to lockups. The original TX
> is a very different machine though (TX 1 and 2 only share the engraving
> of the manufacturer on the heat-spreader).
Thank you for the information.
I will check if we have a ThunderX1 or X2 environment. If we have either one, I will investigate it.
> M.
> --
> Jazz is not dead. It just smells funny...
Thank you and best regards,
Yuichi Ito