When panic happens while BAU is active there is a chance that
outstanding broadcasts tie up BAU resources enough to cause timeouts in
the UV ASIC. These timeouts are hardware errors that immediately bring
down the system, preventing kdump from completing.
Add uv_bau_crash_shutdown() to bring BAU to quiescence during panic before
continuing with the native crash shutdown. Assign uv_bau_crash_shutdown
to machine_ops during init.
Signed-off-by: Andrew Banman <[email protected]>
---
arch/x86/platform/uv/tlb_uv.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index b36caae..e7f9aea 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -22,6 +22,7 @@
#include <asm/tsc.h>
#include <asm/irq_vectors.h>
#include <asm/timer.h>
+#include <asm/reboot.h>
static struct bau_operations ops __ro_after_init;
@@ -2197,6 +2198,32 @@ static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
.wait_completion = uv4_wait_completion,
};
+#ifdef CONFIG_KEXEC_CORE
+/*
+ * Bring BAU to quiesence by disabling future broadcasts and abandoning
+ * current broadcasts during panic.
+ */
+void uv_bau_crash_shutdown(struct pt_regs *regs)
+{
+ set_bau_off();
+ nobau_perm = 1;
+
+ for_each_possible_blade(uvhub) {
+ if (!uv_blade_nr_possible_cpus(uvhub))
+ continue;
+ int pnode = uv_blade_to_pnode(uvhub);
+ /* Set STATUS registers to idle to free source cpus */
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_0, 0x0);
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_1, 0x0);
+ write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_2, 0x0);
+ /* Clear TIMEOUT and PENDING bits to free up BAU resources */
+ ops.write_g_sw_ack(pnode, ops.read_g_sw_ack(pnode) & 0xFFFF);
+ }
+
+ native_machine_crash_shutdown(regs);
+}
+#endif
+
/*
* Initialization of BAU-related structures
*/
@@ -2269,6 +2296,10 @@ static int __init uv_bau_init(void)
}
}
+#ifdef CONFIG_KEXEC_CORE
+ machine_ops.crash_shutdown = uv_bau_crash_shutdown;
+#endif
+
return 0;
err_bau_disable:
--
1.8.2.1
Hi Andrew,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on tip/auto-latest]
[also build test ERROR on v4.17-rc7 next-20180601]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Andrew-Banman/x86-platform-uv-BAU-gracefully-disable-BAU-during-panic/20180604-005410
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All error/warnings (new ones prefixed by >>):
In file included from arch/x86/platform/uv/tlb_uv.c:19:0:
arch/x86/platform/uv/tlb_uv.c: In function 'uv_bau_crash_shutdown':
>> arch/x86/platform/uv/tlb_uv.c:2211:26: error: 'uvhub' undeclared (first use in this function)
for_each_possible_blade(uvhub) {
^
arch/x86/include/asm/uv/uv_hub.h:437:8: note: in definition of macro 'for_each_possible_blade'
for ((bid) = 0; (bid) < uv_num_possible_blades(); (bid)++)
^~~
arch/x86/platform/uv/tlb_uv.c:2211:26: note: each undeclared identifier is reported only once for each function it appears in
for_each_possible_blade(uvhub) {
^
arch/x86/include/asm/uv/uv_hub.h:437:8: note: in definition of macro 'for_each_possible_blade'
for ((bid) = 0; (bid) < uv_num_possible_blades(); (bid)++)
^~~
>> arch/x86/platform/uv/tlb_uv.c:2214:3: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
int pnode = uv_blade_to_pnode(uvhub);
^~~
vim +/uvhub +2211 arch/x86/platform/uv/tlb_uv.c
2200
2201 #ifdef CONFIG_KEXEC_CORE
2202 /*
2203 * Bring BAU to quiesence by disabling future broadcasts and abandoning
2204 * current broadcasts during panic.
2205 */
2206 void uv_bau_crash_shutdown(struct pt_regs *regs)
2207 {
2208 set_bau_off();
2209 nobau_perm = 1;
2210
> 2211 for_each_possible_blade(uvhub) {
2212 if (!uv_blade_nr_possible_cpus(uvhub))
2213 continue;
> 2214 int pnode = uv_blade_to_pnode(uvhub);
2215 /* Set STATUS registers to idle to free source cpus */
2216 write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_0, 0x0);
2217 write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_1, 0x0);
2218 write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_STATUS_2, 0x0);
2219 /* Clear TIMEOUT and PENDING bits to free up BAU resources */
2220 ops.write_g_sw_ack(pnode, ops.read_g_sw_ack(pnode) & 0xFFFF);
2221 }
2222
2223 native_machine_crash_shutdown(regs);
2224 }
2225 #endif
2226
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
On Sun, Jun 03, 2018 at 11:42:32AM -0500, Andrew Banman wrote:
> When panic happens while BAU is active there is a chance that
> outstanding broadcasts tie up BAU resources enough to cause timeouts in
> the UV ASIC. These timeouts are hardware errors that immediately bring
> down the system, preventing kdump from completing.
>
> Add uv_bau_crash_shutdown() to bring BAU to quiescence during panic before
> continuing with the native crash shutdown. Assign uv_bau_crash_shutdown
> to machine_ops during init.
>
> Signed-off-by: Andrew Banman <[email protected]>
> ---
> arch/x86/platform/uv/tlb_uv.c | 31 +++++++++++++++++++++++++++++++
> 1 file changed, 31 insertions(+)
<formletter>
This is not the correct way to submit patches for inclusion in the
stable kernel tree. Please read:
https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
for how to do this properly.
</formletter>