2013-09-23 21:26:56

by Mike Travis

[permalink] [raw]
Subject: [PATCH 4/7] x86/UV: Add kdump to UV NMI handler

If a system has hung and it no longer responds to external events, this
patch adds the capability of doing a standard kdump and system reboot
then triggered by the system NMI command.

It is enabled when the nmi action is changed to "kdump" and the
kernel is built with CONFIG_KEXEC enabled.

Signed-off-by: Mike Travis <[email protected]>
Reviewed-by: Dimitri Sivanich <[email protected]>
Reviewed-by: Hedi Berriche <[email protected]>
---
arch/x86/platform/uv/uv_nmi.c | 41 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)

--- linux.orig/arch/x86/platform/uv/uv_nmi.c
+++ linux/arch/x86/platform/uv/uv_nmi.c
@@ -21,6 +21,7 @@

#include <linux/cpu.h>
#include <linux/delay.h>
+#include <linux/kexec.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/sched.h>
@@ -70,6 +71,7 @@ static atomic_t uv_in_nmi;
static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1);
static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1);
static atomic_t uv_nmi_slave_continue;
+static atomic_t uv_nmi_kexec_failed;
static cpumask_var_t uv_nmi_cpu_mask;

/* Values for uv_nmi_slave_continue */
@@ -143,6 +145,7 @@ module_param_named(retry_count, uv_nmi_r
* Valid NMI Actions:
* "dump" - dump process stack for each cpu
* "ips" - dump IP info for each cpu
+ * "kdump" - do crash dump
*/
static char uv_nmi_action[8] = "dump";
module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644);
@@ -496,6 +499,40 @@ static void uv_nmi_touch_watchdogs(void)
touch_nmi_watchdog();
}

+#if defined(CONFIG_KEXEC)
+static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
+{
+ /* Call crash to dump system state */
+ if (master) {
+ pr_emerg("UV: NMI executing crash_kexec on CPU%d\n", cpu);
+ crash_kexec(regs);
+
+ pr_emerg("UV: crash_kexec unexpectedly returned, ");
+ if (!kexec_crash_image) {
+ pr_cont("crash kernel not loaded\n");
+ atomic_set(&uv_nmi_kexec_failed, 1);
+ uv_nmi_sync_exit(1);
+ return;
+ }
+ pr_cont("kexec busy, stalling cpus while waiting\n");
+ }
+
+ /* If crash exec fails the slaves should return, otherwise stall */
+ while (atomic_read(&uv_nmi_kexec_failed) == 0)
+ mdelay(10);
+
+ /* Crash kernel most likely not loaded, return in an orderly fashion */
+ uv_nmi_sync_exit(0);
+}
+
+#else /* !CONFIG_KEXEC */
+static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
+{
+ if (master)
+ pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
+}
+#endif /* !CONFIG_KEXEC */
+
/*
* UV NMI handler
*/
@@ -517,6 +554,10 @@ int uv_handle_nmi(unsigned int reason, s
/* Indicate we are the first CPU into the NMI handler */
master = (atomic_read(&uv_nmi_cpu) == cpu);

+ /* If NMI action is "kdump", then attempt to do it */
+ if (uv_nmi_action_is("kdump"))
+ uv_nmi_kdump(cpu, master, regs);
+
/* Pause as all cpus enter the NMI handler */
uv_nmi_wait(master);


--


Subject: [tip:x86/uv] x86/UV: Add kdump to UV NMI handler

Commit-ID: 12ba6c990fab50fe568f3ad8715e81e356552428
Gitweb: http://git.kernel.org/tip/12ba6c990fab50fe568f3ad8715e81e356552428
Author: Mike Travis <[email protected]>
AuthorDate: Mon, 23 Sep 2013 16:25:03 -0500
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 24 Sep 2013 09:02:03 +0200

x86/UV: Add kdump to UV NMI handler

If a system has hung and it no longer responds to external
events, this patch adds the capability of doing a standard kdump
and system reboot then triggered by the system NMI command.

It is enabled when the nmi action is changed to "kdump" and the
kernel is built with CONFIG_KEXEC enabled.

Signed-off-by: Mike Travis <[email protected]>
Reviewed-by: Dimitri Sivanich <[email protected]>
Reviewed-by: Hedi Berriche <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Jason Wessel <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/platform/uv/uv_nmi.c | 41 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)

diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 4efcde1..2579fbd 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -21,6 +21,7 @@

#include <linux/cpu.h>
#include <linux/delay.h>
+#include <linux/kexec.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/sched.h>
@@ -70,6 +71,7 @@ static atomic_t uv_in_nmi;
static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1);
static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1);
static atomic_t uv_nmi_slave_continue;
+static atomic_t uv_nmi_kexec_failed;
static cpumask_var_t uv_nmi_cpu_mask;

/* Values for uv_nmi_slave_continue */
@@ -143,6 +145,7 @@ module_param_named(retry_count, uv_nmi_retry_count, int, 0644);
* Valid NMI Actions:
* "dump" - dump process stack for each cpu
* "ips" - dump IP info for each cpu
+ * "kdump" - do crash dump
*/
static char uv_nmi_action[8] = "dump";
module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644);
@@ -496,6 +499,40 @@ static void uv_nmi_touch_watchdogs(void)
touch_nmi_watchdog();
}

+#if defined(CONFIG_KEXEC)
+static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
+{
+ /* Call crash to dump system state */
+ if (master) {
+ pr_emerg("UV: NMI executing crash_kexec on CPU%d\n", cpu);
+ crash_kexec(regs);
+
+ pr_emerg("UV: crash_kexec unexpectedly returned, ");
+ if (!kexec_crash_image) {
+ pr_cont("crash kernel not loaded\n");
+ atomic_set(&uv_nmi_kexec_failed, 1);
+ uv_nmi_sync_exit(1);
+ return;
+ }
+ pr_cont("kexec busy, stalling cpus while waiting\n");
+ }
+
+ /* If crash exec fails the slaves should return, otherwise stall */
+ while (atomic_read(&uv_nmi_kexec_failed) == 0)
+ mdelay(10);
+
+ /* Crash kernel most likely not loaded, return in an orderly fashion */
+ uv_nmi_sync_exit(0);
+}
+
+#else /* !CONFIG_KEXEC */
+static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
+{
+ if (master)
+ pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
+}
+#endif /* !CONFIG_KEXEC */
+
/*
* UV NMI handler
*/
@@ -517,6 +554,10 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
/* Indicate we are the first CPU into the NMI handler */
master = (atomic_read(&uv_nmi_cpu) == cpu);

+ /* If NMI action is "kdump", then attempt to do it */
+ if (uv_nmi_action_is("kdump"))
+ uv_nmi_kdump(cpu, master, regs);
+
/* Pause as all cpus enter the NMI handler */
uv_nmi_wait(master);