Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933531AbbENLmu (ORCPT ); Thu, 14 May 2015 07:42:50 -0400 Received: from szxga01-in.huawei.com ([58.251.152.64]:27663 "EHLO szxga01-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932699AbbENLj3 (ORCPT ); Thu, 14 May 2015 07:39:29 -0400 From: Wang Long To: , , CC: , , , , , , , , , Subject: [RFC PATCH 16/17] x86/nmi: Perform a safe NMI stack trace on all CPUs Date: Thu, 14 May 2015 11:35:03 +0000 Message-ID: <1431603304-162571-17-git-send-email-long.wanglong@huawei.com> X-Mailer: git-send-email 1.8.3.4 In-Reply-To: <1431603304-162571-1-git-send-email-long.wanglong@huawei.com> References: <1431603304-162571-1-git-send-email-long.wanglong@huawei.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.107.197.200] X-CFilter-Loop: Reflected Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5871 Lines: 175 From: "Steven Rostedt (Red Hat)" commit a9edc88093287183ac934be44f295f183b2c62dd upstream. When trigger_all_cpu_backtrace() is called on x86, it will trigger an NMI on each CPU and call show_regs(). But this can lead to a hard lock up if the NMI comes in on another printk(). In order to avoid this, when the NMI triggers, it switches the printk routine for that CPU to call a NMI safe printk function that records the printk in a per_cpu seq_buf descriptor. After all NMIs have finished recording its data, the seq_bufs are printed in a safe context. Link: http://lkml.kernel.org/p/20140619213952.360076309@goodmis.org Link: http://lkml.kernel.org/r/20141115050605.055232587@goodmis.org Tested-by: Jiri Kosina Acked-by: Jiri Kosina Acked-by: Paul E. McKenney Reviewed-by: Petr Mladek [wanglong: backport to 3.10 stable - adjust context ] Signed-off-by: Wang Long Signed-off-by: Steven Rostedt --- arch/x86/kernel/apic/hw_nmi.c | 86 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index a698d71..1eb5f90 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(int watchdog_thresh) @@ -29,12 +30,33 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) #ifdef arch_trigger_all_cpu_backtrace /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; +static cpumask_var_t printtrace_mask; + +#define NMI_BUF_SIZE 4096 + +struct nmi_seq_buf { + unsigned char buffer[NMI_BUF_SIZE]; + struct seq_buf seq; +}; + +/* Safe printing in NMI context */ +static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); /* "in progress" flag of arch_trigger_all_cpu_backtrace */ static unsigned long backtrace_flag; +static void print_seq_line(struct nmi_seq_buf *s, int start, int end) +{ + const char *buf = s->buffer + start; + + printk("%.*s", (end - start) + 1, buf); +} + void arch_trigger_all_cpu_backtrace(void) { + struct nmi_seq_buf *s; + int len; + int cpu; int i; if (test_and_set_bit(0, &backtrace_flag)) @@ -45,6 +67,15 @@ void arch_trigger_all_cpu_backtrace(void) return; cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); + cpumask_copy(printtrace_mask, to_cpumask(backtrace_mask)); + /* + * Set up per_cpu seq_buf buffers that the NMIs running on the other + * CPUs will write to. + */ + for_each_cpu(cpu, to_cpumask(backtrace_mask)) { + s = &per_cpu(nmi_print_seq, cpu); + seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE); + } printk(KERN_INFO "sending NMI to all CPUs:\n"); apic->send_IPI_all(NMI_VECTOR); @@ -56,10 +87,57 @@ void arch_trigger_all_cpu_backtrace(void) mdelay(1); } + /* + * Now that all the NMIs have triggered, we can dump out their + * back traces safely to the console. + */ + for_each_cpu(cpu, printtrace_mask) { + int last_i = 0; + + s = &per_cpu(nmi_print_seq, cpu); + len = seq_buf_used(&s->seq); + if (!len) + continue; + + /* Print line by line. */ + for (i = 0; i < len; i++) { + if (s->buffer[i] == '\n') { + print_seq_line(s, last_i, i); + last_i = i + 1; + } + } + /* Check if there was a partial line. */ + if (last_i < len) { + print_seq_line(s, last_i, len - 1); + pr_cont("\n"); + } + } + clear_bit(0, &backtrace_flag); smp_mb__after_clear_bit(); } +/* + * It is not safe to call printk() directly from NMI handlers. + * It may be fine if the NMI detected a lock up and we have no choice + * but to do so, but doing a NMI on all other CPUs to get a back trace + * can be done with a sysrq-l. We don't want that to lock up, which + * can happen if the NMI interrupts a printk in progress. + * + * Instead, we redirect the vprintk() to this nmi_vprintk() that writes + * the content into a per cpu seq_buf buffer. Then when the NMIs are + * all done, we can safely dump the contents of the seq_buf to a printk() + * from a non NMI context. + */ +static int nmi_vprintk(const char *fmt, va_list args) +{ + struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + unsigned int len = seq_buf_used(&s->seq); + + seq_buf_vprintf(&s->seq, fmt, args); + return seq_buf_used(&s->seq) - len; +} + static int __kprobes arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { @@ -68,12 +146,14 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + printk_func_t printk_func_save = this_cpu_read(printk_func); - arch_spin_lock(&lock); + /* Replace printk to write into the NMI seq */ + this_cpu_write(printk_func, nmi_vprintk); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); show_regs(regs); - arch_spin_unlock(&lock); + this_cpu_write(printk_func, printk_func_save); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return NMI_HANDLED; } -- 1.8.3.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/