Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933545AbbFJMzW (ORCPT ); Wed, 10 Jun 2015 08:55:22 -0400 Received: from casper.infradead.org ([85.118.1.10]:59068 "EHLO casper.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754011AbbFJMzP (ORCPT ); Wed, 10 Jun 2015 08:55:15 -0400 Date: Wed, 10 Jun 2015 14:55:09 +0200 From: Peter Zijlstra To: Steven Rostedt Cc: linux-kernel@vger.kernel.org, jkosina@suse.cz, paulmck@linux.vnet.ibm.com, pmladek@suse.cz, Ingo Molnar , Thomas Gleixner Subject: [RFC][PATCH] printk: Fixup the nmi printk mess Message-ID: <20150610125509.GO19282@twins.programming.kicks-ass.net> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.21 (2012-12-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 15911 Lines: 543 Hi, I just stumbled upon: a9edc8809328 ("x86/nmi: Perform a safe NMI stack trace on all CPUs") Which is not dissimilar to what I've proposed in the past. Except its squirreled away in some far and dark corner of one arch. Lets fix that. --- arch/Kconfig | 4 ++ arch/arm/Kconfig | 1 + arch/avr32/Kconfig | 1 + arch/blackfin/Kconfig | 1 + arch/cris/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/sparc/Kconfig | 1 + arch/tile/Kconfig | 1 + arch/x86/Kconfig | 1 + arch/x86/kernel/apic/hw_nmi.c | 86 +---------------------------- include/linux/hardirq.h | 2 + include/linux/percpu.h | 3 - include/linux/printk.h | 10 +++- init/Kconfig | 5 ++ init/main.c | 1 + kernel/printk/printk.c | 124 ++++++++++++++++++++++++++++++++++++++---- 18 files changed, 145 insertions(+), 100 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index a65eafb24997..ad8db3979e21 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -179,7 +179,11 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool +config HAVE_NMI + bool + config HAVE_NMI_WATCHDOG + depends on HAVE_NMI bool # # An arch should select this if it provides all these things: diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 45df48ba0b12..a26e83699df9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -61,6 +61,7 @@ config ARM select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_MEMBLOCK select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND + select HAVE_NMI if (!CPU_V7) select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_OPTPROBES if !THUMB2_KERNEL select HAVE_PERF_EVENTS diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index b6878eb64884..9dc3e2b1180b 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -17,6 +17,7 @@ config AVR32 select GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA + select HAVE_NMI help AVR32 is a high-performance 32-bit RISC microprocessor core, designed for cost-sensitive embedded applications, with particular diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index af76634f8d98..47c0a55acafd 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -40,6 +40,7 @@ config BLACKFIN select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW + select HAVE_NMI config GENERIC_CSUM def_bool y diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 0314e325a669..2e053f709c4b 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -58,6 +58,7 @@ config CRIS select CLKSRC_MMIO if ETRAX_ARCH_V32 select GENERIC_CLOCKEVENTS if ETRAX_ARCH_V32 select GENERIC_SCHED_CLOCK if ETRAX_ARCH_V32 + select HAVE_NMI config HZ int diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 190cc48abc0c..079f4fec9586 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -153,6 +153,7 @@ config PPC select NO_BOOTMEM select HAVE_GENERIC_RCU_GUP select HAVE_PERF_EVENTS_NMI if PPC64 + select HAVE_NMI if PERF_EVENTS config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b06dc3839268..1b84036437c0 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -152,6 +152,7 @@ config S390 select TTY select VIRT_CPU_ACCOUNTING select VIRT_TO_BUS + select HAVE_NMI config SCHED_OMIT_FRAME_POINTER def_bool y diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 50057fed819d..7803b6863c0e 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -44,6 +44,7 @@ config SUPERH select OLD_SIGSUSPEND select OLD_SIGACTION select HAVE_ARCH_AUDITSYSCALL + select HAVE_NMI help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index e49502acbab4..198abcb6e510 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -80,6 +80,7 @@ config SPARC64 select NO_BOOTMEM select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW + select HAVE_NMI config ARCH_DEFCONFIG string diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index a07e31b50d3f..1503ab5882cd 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -28,6 +28,7 @@ config TILE select HAVE_DEBUG_STACKOVERFLOW select ARCH_WANT_FRAME_POINTERS select HAVE_CONTEXT_TRACKING + select HAVE_NMI if USE_PMC # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 228aa35d7e89..67dbc7442499 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -118,6 +118,7 @@ config X86 select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MIXED_BREAKPOINTS_REGS + select HAVE_NMI select HAVE_OPROFILE select HAVE_OPTPROBES select HAVE_PCSPKR_PLATFORM diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 6873ab925d00..90eb8c385e60 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -18,7 +18,6 @@ #include #include #include -#include #ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(int watchdog_thresh) @@ -30,35 +29,14 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) #ifdef arch_trigger_all_cpu_backtrace /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; -static cpumask_t printtrace_mask; - -#define NMI_BUF_SIZE 4096 - -struct nmi_seq_buf { - unsigned char buffer[NMI_BUF_SIZE]; - struct seq_buf seq; -}; - -/* Safe printing in NMI context */ -static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); /* "in progress" flag of arch_trigger_all_cpu_backtrace */ static unsigned long backtrace_flag; -static void print_seq_line(struct nmi_seq_buf *s, int start, int end) -{ - const char *buf = s->buffer + start; - - printk("%.*s", (end - start) + 1, buf); -} - void arch_trigger_all_cpu_backtrace(bool include_self) { - struct nmi_seq_buf *s; - int len; - int cpu; - int i; int this_cpu = get_cpu(); + int i; if (test_and_set_bit(0, &backtrace_flag)) { /* @@ -73,16 +51,6 @@ void arch_trigger_all_cpu_backtrace(bool include_self) if (!include_self) cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); - cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask)); - /* - * Set up per_cpu seq_buf buffers that the NMIs running on the other - * CPUs will write to. - */ - for_each_cpu(cpu, to_cpumask(backtrace_mask)) { - s = &per_cpu(nmi_print_seq, cpu); - seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE); - } - if (!cpumask_empty(to_cpumask(backtrace_mask))) { pr_info("sending NMI to %s CPUs:\n", (include_self ? "all" : "other")); @@ -97,58 +65,11 @@ void arch_trigger_all_cpu_backtrace(bool include_self) touch_softlockup_watchdog(); } - /* - * Now that all the NMIs have triggered, we can dump out their - * back traces safely to the console. - */ - for_each_cpu(cpu, &printtrace_mask) { - int last_i = 0; - - s = &per_cpu(nmi_print_seq, cpu); - len = seq_buf_used(&s->seq); - if (!len) - continue; - - /* Print line by line. */ - for (i = 0; i < len; i++) { - if (s->buffer[i] == '\n') { - print_seq_line(s, last_i, i); - last_i = i + 1; - } - } - /* Check if there was a partial line. */ - if (last_i < len) { - print_seq_line(s, last_i, len - 1); - pr_cont("\n"); - } - } - clear_bit(0, &backtrace_flag); smp_mb__after_atomic(); put_cpu(); } -/* - * It is not safe to call printk() directly from NMI handlers. - * It may be fine if the NMI detected a lock up and we have no choice - * but to do so, but doing a NMI on all other CPUs to get a back trace - * can be done with a sysrq-l. We don't want that to lock up, which - * can happen if the NMI interrupts a printk in progress. - * - * Instead, we redirect the vprintk() to this nmi_vprintk() that writes - * the content into a per cpu seq_buf buffer. Then when the NMIs are - * all done, we can safely dump the contents of the seq_buf to a printk() - * from a non NMI context. - */ -static int nmi_vprintk(const char *fmt, va_list args) -{ - struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - unsigned int len = seq_buf_used(&s->seq); - - seq_buf_vprintf(&s->seq, fmt, args); - return seq_buf_used(&s->seq) - len; -} - static int arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { @@ -157,13 +78,8 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - printk_func_t printk_func_save = this_cpu_read(printk_func); - - /* Replace printk to write into the NMI seq */ - this_cpu_write(printk_func, nmi_vprintk); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); show_regs(regs); - this_cpu_write(printk_func, printk_func_save); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return NMI_HANDLED; diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index dfd59d6bc6f0..a477e0766d2e 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -67,10 +67,12 @@ extern void irq_exit(void); preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ rcu_nmi_enter(); \ trace_hardirq_enter(); \ + printk_nmi_enter(); \ } while (0) #define nmi_exit() \ do { \ + printk_nmi_exit(); \ trace_hardirq_exit(); \ rcu_nmi_exit(); \ BUG_ON(!in_nmi()); \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index caebf2a758dc..04c68b9f56f8 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -135,7 +135,4 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr); (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ __alignof__(type)) -/* To avoid include hell, as printk can not declare this, we declare it here */ -DECLARE_PER_CPU(printk_func_t, printk_func); - #endif /* __LINUX_PERCPU_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 9b30871c9149..ebe017327e6e 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -120,7 +120,15 @@ static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif -typedef int(*printk_func_t)(const char *fmt, va_list args); +#ifdef CONFIG_PRINTK_NMI +extern void printk_init(void); +extern void printk_nmi_enter(void); +extern void printk_nmi_exit(void); +#else +static inline void printk_init(void) { } +static inline void printk_nmi_enter(void) { } +static inline void printk_nmi_exit(void) { } +#endif /* HAVE_NMI */ #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) diff --git a/init/Kconfig b/init/Kconfig index b999fa381bf9..fe9a439c8843 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1431,6 +1431,11 @@ config PRINTK very difficult to diagnose system problems, saying N here is strongly discouraged. +config PRINTK_NMI + def_bool y + depends on PRINTK + depends on HAVE_NMI + config BUG bool "BUG() support" if EXPERT default y diff --git a/init/main.c b/init/main.c index 2115055faeac..4a28accaaa98 100644 --- a/init/main.c +++ b/init/main.c @@ -586,6 +586,7 @@ asmlinkage __visible void __init start_kernel(void) timekeeping_init(); time_init(); sched_clock_postinit(); + printk_init(); perf_event_init(); profile_init(); call_function_init(); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c099b082cd02..0149c41be95b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1821,13 +1821,125 @@ int vprintk_default(const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(vprintk_default); +#ifdef CONFIG_PRINTK_NMI + +typedef int(*printk_func_t)(const char *fmt, va_list args); /* * This allows printk to be diverted to another function per cpu. * This is useful for calling printk functions from within NMI * without worrying about race conditions that can lock up the * box. */ -DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; +static DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; + +#include + +struct nmi_seq_buf { + struct seq_buf seq; + struct irq_work work; + unsigned char buffer[PAGE_SIZE - + sizeof(struct seq_buf) - + sizeof(struct irq_work)]; +}; + +/* Safe printing in NMI context */ +static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); + +static void print_seq_line(struct nmi_seq_buf *s, int start, int end) +{ + const char *buf = s->buffer + start; + + printk("%.*s", (end - start) + 1, buf); +} + +static void __printk_nmi_flush(struct irq_work *work) +{ + struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work); + int len, last_i = 0, i = 0; + +again: + len = seq_buf_used(&s->seq); + if (!len) + return; + + /* Print line by line. */ + for (; i < len; i++) { + if (s->buffer[i] == '\n') { + print_seq_line(s, last_i, i); + last_i = i + 1; + } + } + /* Check if there was a partial line. */ + if (last_i < len) { + print_seq_line(s, last_i, len - 1); + pr_cont("\n"); + } + + /* + * Another NMI could have come in while we were printing + * check if nothing has been added to the buffer. + */ + if (cmpxchg_local(&s->seq.len, len, 0) != len) + goto again; +} + +void printk_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct nmi_seq_buf *s = &per_cpu(nmi_print_seq, cpu); + + init_irq_work(&s->work, __printk_nmi_flush); + seq_buf_init(&s->seq, s->buffer, sizeof(s->buffer)); + } +} + +/* + * It is not safe to call printk() directly from NMI handlers. + * It may be fine if the NMI detected a lock up and we have no choice + * but to do so, but doing a NMI on all other CPUs to get a back trace + * can be done with a sysrq-l. We don't want that to lock up, which + * can happen if the NMI interrupts a printk in progress. + * + * Instead, we redirect the vprintk() to this nmi_vprintk() that writes + * the content into a per cpu seq_buf buffer. Then when the NMIs are + * all done, we can safely dump the contents of the seq_buf to a printk() + * from a non NMI context. + */ +static int vprintk_nmi(const char *fmt, va_list args) +{ + struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + unsigned int len = seq_buf_used(&s->seq); + + irq_work_queue(&s->work); + seq_buf_vprintf(&s->seq, fmt, args); + return seq_buf_used(&s->seq) - len; +} + +void printk_nmi_enter(void) +{ + this_cpu_write(printk_func, vprintk_nmi); +} + +void printk_nmi_exit(void) +{ + this_cpu_write(printk_func, vprintk_default); +} + +static inline int vprintk_func(const char *fmt, va_list args) +{ + return this_cpu_read(printk_func)(fmt, args); +} + +#else + +static inline int vprintk_func(const char *fmt, va_list args) +{ + return vprintk_default(fmt, args); +} + +#endif /* PRINTK_NMI */ /** * printk - print a kernel message @@ -1852,21 +1964,11 @@ DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; */ asmlinkage __visible int printk(const char *fmt, ...) { - printk_func_t vprintk_func; va_list args; int r; va_start(args, fmt); - - /* - * If a caller overrides the per_cpu printk_func, then it needs - * to disable preemption when calling printk(). Otherwise - * the printk_func should be set to the default. No need to - * disable preemption here. - */ - vprintk_func = this_cpu_read(printk_func); r = vprintk_func(fmt, args); - va_end(args); return r; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/