Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751701AbdIEIyD (ORCPT ); Tue, 5 Sep 2017 04:54:03 -0400 Received: from bombadil.infradead.org ([65.50.211.133]:39039 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751379AbdIEIx5 (ORCPT ); Tue, 5 Sep 2017 04:53:57 -0400 Date: Tue, 5 Sep 2017 10:53:50 +0200 From: Peter Zijlstra To: Markus Trippelsdorf Cc: linux-kernel@vger.kernel.org, Thomas Gleixner , Ingo Molnar Subject: Re: Current mainline git (24e700e291d52bd2) hangs when building e.g. perf Message-ID: <20170905085350.cgi7shvnillbikow@hirez.programming.kicks-ass.net> References: <20170905072738.GA277@x4> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20170905072738.GA277@x4> User-Agent: NeoMutt/20170609 (1.8.3) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4834 Lines: 196 On Tue, Sep 05, 2017 at 09:27:38AM +0200, Markus Trippelsdorf wrote: > Current mainline git (24e700e291d52bd2) hangs when building software > concurrently (for example perf). > The issue is not 100% reproducible (sometimes building perf succeeds), > so bisecting will not work. Sadly I cannot reproduce, I had: while :; do make clean; make; done running on tools/perf for a while, and now have: while :; do make O=defconfig-build clean; make O=defconfig-build -j80; done running, all smooth sailing, although there's the hope that the moment I hit send on this email the box comes unstuck. > Magic SysRq key doesn't work and there is nothing in the logs. > Enabling CONFIG_PROVE_LOCKING makes the issue go away. SysRq not working is suspicious.. and I take it the NMI watchdog also isn't firing? > Any ideas on how to debug this further? So you have a (real) serial line on that box? Could you try something like: debug ignore_loglevel sysrq_always_enabled earlyprintk=serial,ttyS0,115200 force_early_printk with the below patch applied? That always gives me the most reliable output. --- kernel/printk/printk.c | 119 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 33 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index fc47863f629c..b17099fbc7ce 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -365,6 +365,75 @@ __packed __aligned(4) #endif ; +#ifdef CONFIG_EARLY_PRINTK +struct console *early_console; + +static bool __read_mostly force_early_printk; + +static int __init force_early_printk_setup(char *str) +{ + force_early_printk = true; + return 0; +} +early_param("force_early_printk", force_early_printk_setup); + +static int early_printk_cpu = -1; + +static int early_vprintk(const char *fmt, va_list args) +{ + int n, cpu, old; + char buf[512]; + + cpu = get_cpu(); + /* + * Test-and-Set inter-cpu spinlock with recursion. + */ + for (;;) { + /* + * c-cas to avoid the exclusive bouncing on spin. + * Depends on the memory barrier implied by cmpxchg + * for ACQUIRE semantics. + */ + old = READ_ONCE(early_printk_cpu); + if (old == -1) { + old = cmpxchg(&early_printk_cpu, -1, cpu); + if (old == -1) + break; + } + /* + * Allow recursion for interrupts and the like. + */ + if (old == cpu) + break; + + cpu_relax(); + } + + n = vscnprintf(buf, sizeof(buf), fmt, args); + early_console->write(early_console, buf, n); + + /* + * Unlock -- in case @old == @cpu, this is a no-op. + */ + smp_store_release(&early_printk_cpu, old); + put_cpu(); + + return n; +} + +asmlinkage __visible void early_printk(const char *fmt, ...) +{ + va_list ap; + + if (!early_console) + return; + + va_start(ap, fmt); + early_vprintk(fmt, ap); + va_end(ap); +} +#endif + /* * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken * within the scheduler's rq lock. It must be released before calling @@ -1704,6 +1773,16 @@ asmlinkage int vprintk_emit(int facility, int level, int printed_len = 0; bool in_sched = false; +#ifdef CONFIG_KGDB_KDB + if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) + return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); +#endif + +#ifdef CONFIG_EARLY_PRINTK + if (force_early_printk && early_console) + return early_vprintk(fmt, args); +#endif + if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; in_sched = true; @@ -1796,18 +1875,7 @@ EXPORT_SYMBOL(printk_emit); int vprintk_default(const char *fmt, va_list args) { - int r; - -#ifdef CONFIG_KGDB_KDB - /* Allow to pass printk() to kdb but avoid a recursion. */ - if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) { - r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); - return r; - } -#endif - r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); - - return r; + return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); } EXPORT_SYMBOL_GPL(vprintk_default); @@ -1838,7 +1906,12 @@ asmlinkage __visible int printk(const char *fmt, ...) int r; va_start(args, fmt); - r = vprintk_func(fmt, args); +#ifdef CONFIG_EARLY_PRINTK + if (force_early_printk && early_console) + r = vprintk_default(fmt, args); + else +#endif + r = vprintk_func(fmt, args); va_end(args); return r; @@ -1875,26 +1948,6 @@ static bool suppress_message_printing(int level) { return false; } #endif /* CONFIG_PRINTK */ -#ifdef CONFIG_EARLY_PRINTK -struct console *early_console; - -asmlinkage __visible void early_printk(const char *fmt, ...) -{ - va_list ap; - char buf[512]; - int n; - - if (!early_console) - return; - - va_start(ap, fmt); - n = vscnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); - - early_console->write(early_console, buf, n); -} -#endif - static int __add_preferred_console(char *name, int idx, char *options, char *brl_options) {