2018-08-13 14:50:36

by Andrew Banman

[permalink] [raw]
Subject: [PATCH] x86/platform/uv/BAU: gracefully disable BAU during panic

When we panic while the BAU is active, outstanding broadcasts may go
un-acknowledged by the kernel. These broadcasts cause timeouts in the
UV ASIC that tie up BAU resources, which may cause a fatal error that
brings down the system, thereby crashing the kdump kexec.

Add uv_bau_crash_shutdown() to bring BAU to quiescence ahead of the crash
shutdown routine saved in smp machine_ops. Assign uv_bau_crash_shutdown
to machine_ops during init.

Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Colin Ian King <[email protected]>
Cc: Dimitri Sivanich <[email protected]>
Cc: [email protected]
Cc: [email protected]
Acked-by: Mike Travis <[email protected]>
Signed-off-by: Andrew Banman <[email protected]>
---
arch/x86/platform/uv/tlb_uv.c | 49 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index e26dfad..dae6b3c 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -22,8 +22,13 @@
#include <asm/tsc.h>
#include <asm/irq_vectors.h>
#include <asm/timer.h>
+#include <asm/reboot.h>

static struct bau_operations ops __ro_after_init;
+#ifdef CONFIG_KEXEC_CORE
+static void (*crash_shutdown)(struct pt_regs *regs) __ro_after_init;
+static bool crash_in_progress;
+#endif

/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
static const int timeout_base_ns[] = {
@@ -2195,6 +2200,44 @@ static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
.wait_completion = uv4_wait_completion,
};

+#ifdef CONFIG_KEXEC_CORE
+/*
+ * Bring BAU to quiesence by abandoning current broadcasts and freeing up
+ * resources needed by firmware-initiated BAU broadcasts.
+ */
+void uv_bau_crash_shutdown(struct pt_regs *regs)
+{
+ int pnode = 0;
+ int uvhub = 0;
+
+ if (crash_in_progress)
+ return;
+ crash_in_progress = true;
+
+ /*
+ * Don't bother turning off BAU in the per-cpu structs. We free up
+ * enough INTD resources to accommodate any new broadcasts that
+ * may (however unlikley) start up before we complete the panic,
+ * without tying up FW-initiated General Broadcasts that must not
+ * time out.
+ */
+
+ for_each_possible_blade(uvhub) {
+ if (!uv_blade_nr_possible_cpus(uvhub))
+ continue;
+ pnode = uv_blade_to_pnode(uvhub);
+ /* Set STATUS registers to idle to free source cpus */
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_0, 0x0);
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_1, 0x0);
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_2, 0x0);
+ /* Clear TIMEOUT and PENDING bits to free up BAU resources */
+ ops.write_g_sw_ack(pnode, ops.read_g_sw_ack(pnode) & 0xFFFF);
+ }
+
+ crash_shutdown(regs);
+}
+#endif
+
/*
* Initialization of BAU-related structures
*/
@@ -2267,6 +2310,12 @@ static int __init uv_bau_init(void)
}
}

+#ifdef CONFIG_KEXEC_CORE
+ crash_shutdown = machine_ops.crash_shutdown;
+ machine_ops.crash_shutdown = uv_bau_crash_shutdown;
+ crash_in_progress = false;
+#endif
+
return 0;

err_bau_disable:
--
1.8.2.1



Subject: [tip:x86/urgent] x86/platform/uv/BAU: Gracefully disable BAU during panic

Commit-ID: 99f3965878759d36baac944df004b4dafcc272b4
Gitweb: https://git.kernel.org/tip/99f3965878759d36baac944df004b4dafcc272b4
Author: Andrew Banman <[email protected]>
AuthorDate: Mon, 13 Aug 2018 08:54:37 -0500
Committer: Thomas Gleixner <[email protected]>
CommitDate: Mon, 20 Aug 2018 18:04:43 +0200

x86/platform/uv/BAU: Gracefully disable BAU during panic

When a panic happens while the BAU is active, outstanding broadcasts may go
un-acknowledged by the kernel. These broadcasts cause timeouts in the UV
ASIC that tie up BAU resources, which may cause a fatal error that brings
down the system, thereby crashing the kdump kexec.

Add uv_bau_crash_shutdown() to bring BAU to quiescence ahead of the crash
shutdown routine saved in smp machine_ops. Assign uv_bau_crash_shutdown
to machine_ops during init.

Signed-off-by: Andrew Banman <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Acked-by: Mike Travis <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Colin Ian King <[email protected]>
Cc: Dimitri Sivanich <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/platform/uv/tlb_uv.c | 49 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index a4130b84d1ff..4c1e1197b82d 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -22,8 +22,13 @@
#include <asm/tsc.h>
#include <asm/irq_vectors.h>
#include <asm/timer.h>
+#include <asm/reboot.h>

static struct bau_operations ops __ro_after_init;
+#ifdef CONFIG_KEXEC_CORE
+static void (*crash_shutdown)(struct pt_regs *regs) __ro_after_init;
+static bool crash_in_progress;
+#endif

/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
static const int timeout_base_ns[] = {
@@ -2196,6 +2201,44 @@ static const struct bau_operations uv4_bau_ops __initconst = {
.wait_completion = uv4_wait_completion,
};

+#ifdef CONFIG_KEXEC_CORE
+/*
+ * Bring BAU to quiesence by abandoning current broadcasts and freeing up
+ * resources needed by firmware-initiated BAU broadcasts.
+ */
+void uv_bau_crash_shutdown(struct pt_regs *regs)
+{
+ int pnode = 0;
+ int uvhub = 0;
+
+ if (crash_in_progress)
+ return;
+ crash_in_progress = true;
+
+ /*
+ * Don't bother turning off BAU in the per-cpu structs. We free up
+ * enough INTD resources to accommodate any new broadcasts that
+ * may (however unlikley) start up before we complete the panic,
+ * without tying up FW-initiated General Broadcasts that must not
+ * time out.
+ */
+
+ for_each_possible_blade(uvhub) {
+ if (!uv_blade_nr_possible_cpus(uvhub))
+ continue;
+ pnode = uv_blade_to_pnode(uvhub);
+ /* Set STATUS registers to idle to free source cpus */
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_0, 0x0);
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_1, 0x0);
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_2, 0x0);
+ /* Clear TIMEOUT and PENDING bits to free up BAU resources */
+ ops.write_g_sw_ack(pnode, ops.read_g_sw_ack(pnode) & 0xFFFF);
+ }
+
+ crash_shutdown(regs);
+}
+#endif
+
/*
* Initialization of BAU-related structures
*/
@@ -2268,6 +2311,12 @@ static int __init uv_bau_init(void)
}
}

+#ifdef CONFIG_KEXEC_CORE
+ crash_shutdown = machine_ops.crash_shutdown;
+ machine_ops.crash_shutdown = uv_bau_crash_shutdown;
+ crash_in_progress = false;
+#endif
+
return 0;

err_bau_disable: