2000-11-14 11:07:09

by Andrew Morton

[permalink] [raw]
Subject: [prepatch] removal of oops->printk deadlocks


This patch is designed to remove the things which can cause loss
of oops information due to the SMP deadlocks which can occur during
the oops processing itself. It's fairly x86-specific, but the
infrastructure is in place for other SMP-capable architectures
to use.

It also changes the x86 NMI oopser so that it no longer shuts the
console up after the first NMI oops. Instead, each CPU is allowed
to print out NMI diagnostics a single time per reboot.

This patch only addresses console-on-vgacon. If the console is
on a serial device then there may still be deadlocks. The serial
drivers can individually test `oops_in_progress' to avoid this.

The call to poke_blanked_console() is resurrected, but it's no
longer deadlocky. This is fairly important - without this change
a machine could be wedged with oops diagnostics on its screen,
but the screen would remain blanked.

The patch addresses oopses, die(), NMI oopses and BUG(). The changes
to BUG() result in 3 kbytes less .rodata.

There's a tiny kernel module at
http://www.uow.edu.au/~andrewm/linux/ddeadlock.c which may be
used to test the NMI oopser changes.

Comments are welcome!


--- linux-2.4.0-test11-pre4/include/linux/kernel.h Sun Oct 15 01:27:46 2000
+++ linux-akpm/include/linux/kernel.h Tue Nov 14 21:02:18 2000
@@ -62,6 +62,7 @@

asmlinkage int printk(const char * fmt, ...)
__attribute__ ((format (printf, 1, 2)));
+extern int oops_in_progress;

#if DEBUG
#define pr_debug(fmt,arg...) \
--- linux-2.4.0-test11-pre4/include/asm-i386/page.h Sat Nov 4 16:22:49 2000
+++ linux-akpm/include/asm-i386/page.h Tue Nov 14 21:02:18 2000
@@ -88,8 +88,9 @@
* Tell the user there is some problem. Beep too, so we can
* see^H^H^Hhear bugs in early bootup as well!
*/
+extern void do_BUG(const char *file, int line);
#define BUG() do { \
- printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ do_BUG(__FILE__, __LINE__); \
__asm__ __volatile__(".byte 0x0f,0x0b"); \
} while (0)

--- linux-2.4.0-test11-pre4/kernel/printk.c Sat Nov 4 16:22:49 2000
+++ linux-akpm/kernel/printk.c Tue Nov 14 21:02:18 2000
@@ -51,6 +51,7 @@
static unsigned long logged_chars;
struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
static int preferred_console = -1;
+int oops_in_progress;

/*
* Setup a list of consoles. Called from init/main.c
@@ -260,6 +261,8 @@
static signed char msg_level = -1;
long flags;

+ if (oops_in_progress)
+ spin_lock_init(&console_lock);
spin_lock_irqsave(&console_lock, flags);
va_start(args, fmt);
i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */
@@ -308,7 +311,8 @@
msg_level = -1;
}
spin_unlock_irqrestore(&console_lock, flags);
- wake_up_interruptible(&log_wait);
+ if (!oops_in_progress)
+ wake_up_interruptible(&log_wait);
return i;
}

--- linux-2.4.0-test11-pre4/arch/i386/mm/fault.c Mon Nov 13 18:23:49 2000
+++ linux-akpm/arch/i386/mm/fault.c Tue Nov 14 21:02:18 2000
@@ -77,17 +77,40 @@
return 0;
}

-extern spinlock_t console_lock, timerlist_lock;
+#ifdef CONFIG_SMP
+extern unsigned volatile int global_irq_lock;
+#endif

/*
* Unlock any spinlocks which will prevent us from getting the
- * message out (timerlist_lock is aquired through the
- * console unblank code)
+ * message out and tell the printk/console paths that an emergency
+ * message is coming through
+ */
+void bust_spinlocks(int yes)
+{
+ if (yes) {
+ oops_in_progress = 1;
+#ifdef CONFIG_SMP
+ global_irq_lock = 0; /* Many serial drivers do __global_cli() */
+#endif
+ } else {
+ oops_in_progress = 0;
+ /*
+ * OK, the message is on the console. Now we call printk()
+ * without oops_in_progress set so that printk will give syslogd
+ * a poke. Hold onto your hats...
+ */
+ printk("");
+ }
+}
+
+/*
+ * Called from the BUG() macro.
*/
-void bust_spinlocks(void)
+void do_BUG(const char *file, int line)
{
- spin_lock_init(&console_lock);
- spin_lock_init(&timerlist_lock);
+ bust_spinlocks(1);
+ printk("kernel BUG at %s:%d!\n", file, line);
}

asmlinkage void do_invalid_op(struct pt_regs *, unsigned long);
@@ -264,8 +287,7 @@
* terminate things with extreme prejudice.
*/

- bust_spinlocks();
-
+ bust_spinlocks(1);
if (address < PAGE_SIZE)
printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
else
@@ -283,6 +305,7 @@
printk(KERN_ALERT "*pte = %08lx\n", page);
}
die("Oops", regs, error_code);
+ bust_spinlocks(0);
do_exit(SIGKILL);

/*
--- linux-2.4.0-test11-pre4/arch/i386/kernel/traps.c Mon Nov 13 18:23:49 2000
+++ linux-akpm/arch/i386/kernel/traps.c Tue Nov 14 21:11:10 2000
@@ -63,7 +63,7 @@
struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };

extern int console_loglevel;
-extern void bust_spinlocks(void);
+extern void bust_spinlocks(int yes);

static inline void console_silent(void)
{
@@ -209,9 +209,10 @@
{
console_verbose();
spin_lock_irq(&die_lock);
+ bust_spinlocks(1);
printk("%s: %04lx\n", str, err & 0xffff);
show_registers(regs);
-
+ bust_spinlocks(0);
spin_unlock_irq(&die_lock);
do_exit(SIGSEGV);
}
@@ -414,8 +415,9 @@
* here too!]
*/

- static unsigned int last_irq_sums [NR_CPUS],
+ static unsigned int last_irq_sums [NR_CPUS],
alert_counter [NR_CPUS];
+ static unsigned char printed_message [NR_CPUS];

/*
* Since current-> is always on the stack, and we always switch
@@ -432,17 +434,19 @@
*/
alert_counter[cpu]++;
if (alert_counter[cpu] == 5*HZ) {
- spin_lock(&nmi_print_lock);
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- bust_spinlocks();
- printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu);
- show_registers(regs);
- printk("console shuts up ...\n");
- console_silent();
- spin_unlock(&nmi_print_lock);
+ if (printed_message[cpu] == 0) {
+ printed_message[cpu] = 1;
+ spin_lock(&nmi_print_lock);
+ /*
+ * We are in trouble anyway, lets at least try
+ * to get a message out.
+ */
+ bust_spinlocks(1);
+ printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu);
+ show_registers(regs);
+ spin_unlock(&nmi_print_lock);
+ bust_spinlocks(0);
+ }
do_exit(SIGSEGV);
}
} else {
--- linux-2.4.0-test11-pre4/arch/i386/kernel/i386_ksyms.c Mon Nov 13 18:23:49 2000
+++ linux-akpm/arch/i386/kernel/i386_ksyms.c Tue Nov 14 21:02:18 2000
@@ -157,3 +157,5 @@
#ifdef CONFIG_X86_PAE
EXPORT_SYMBOL(empty_zero_page);
#endif
+
+EXPORT_SYMBOL(do_BUG);
--- linux-2.4.0-test11-pre4/drivers/char/console.c Mon Nov 13 18:23:49 2000
+++ linux-akpm/drivers/char/console.c Tue Nov 14 21:02:18 2000
@@ -2135,6 +2135,7 @@
}
}
set_cursor(currcons);
+ poke_blanked_console();

quit:
clear_bit(0, &printing);
@@ -2661,7 +2662,7 @@
return;
}
console_timer.function = blank_screen;
- if (blankinterval) {
+ if (blankinterval && !oops_in_progress) {
mod_timer(&console_timer, jiffies + blankinterval);
}

@@ -2683,14 +2684,18 @@

void poke_blanked_console(void)
{
- del_timer(&console_timer); /* Can't use _sync here: called from tasklet */
- if (vt_cons[fg_console]->vc_mode == KD_GRAPHICS)
- return;
- if (console_blanked) {
- console_timer.function = unblank_screen_t;
- mod_timer(&console_timer, jiffies); /* Now */
- } else if (blankinterval) {
- mod_timer(&console_timer, jiffies + blankinterval);
+ if (oops_in_progress) {
+ unblank_screen(); /* Just do it */
+ } else {
+ del_timer(&console_timer); /* Can't use _sync here: called from tasklet */
+ if (vt_cons[fg_console]->vc_mode == KD_GRAPHICS)
+ return;
+ if (console_blanked) {
+ console_timer.function = unblank_screen_t;
+ mod_timer(&console_timer, jiffies); /* Now */
+ } else if (blankinterval) {
+ mod_timer(&console_timer, jiffies + blankinterval);
+ }
}
}


2000-11-14 16:41:02

by Pawel Kot

[permalink] [raw]
Subject: Re: [prepatch] removal of oops->printk deadlocks


Hi,

The whole oops produced by the 2.4.0-test11pre4 with this patch is in the
attachment. Other system info is tehere as well.

Oops is produced when runnning big-tables test from mysql sql-bench.
It's reproducable. The machine is running only mysqld.

regards
pkot
--
mailto:[email protected]
http://urtica.linuxnews.pl/~pkot/
http://newsreader.linuxnews.pl/


Attachments:
oops1.ksyms (8.88 kB)
cpuinfo (1.65 kB)
pci (6.20 kB)
iomem (1.12 kB)
ioports (603.00 B)
Download all attachments

2000-11-15 09:39:47

by Ingo Molnar

[permalink] [raw]
Subject: Re: [prepatch] removal of oops->printk deadlocks


On Tue, 14 Nov 2000, Andrew Morton wrote:

> It also changes the x86 NMI oopser so that it no longer shuts the
> console up after the first NMI oops. Instead, each CPU is allowed
> to print out NMI diagnostics a single time per reboot.

this is not how the NMI oopser works. It does *not* shut down the console
- you can still see everything in 'dmesg'. If you want to see it on the
console again, you can do 'dmesg -n 8'. Adding a 'once per reboot'
restriction is unreasonable - there are user-space applications that can
be terminated via the NMI-oopser safely. In 99% of the cases it's the
first oops that counts, and most people do not have serial console set up,
so writing the first oops down from screen is important.

Ingo

2000-11-15 11:38:18

by Andrew Morton

[permalink] [raw]
Subject: Re: [prepatch] removal of oops->printk deadlocks

Ingo Molnar wrote:
>
> On Tue, 14 Nov 2000, Andrew Morton wrote:
>
> > It also changes the x86 NMI oopser so that it no longer shuts the
> > console up after the first NMI oops. Instead, each CPU is allowed
> > to print out NMI diagnostics a single time per reboot.
>
> this is not how the NMI oopser works. It does *not* shut down the console
> - you can still see everything in 'dmesg'. If you want to see it on the
> console again, you can do 'dmesg -n 8'. Adding a 'once per reboot'
> restriction is unreasonable - there are user-space applications that can
> be terminated via the NMI-oopser safely.

The patch addresses that - it still calls do_exit() for every NMI timeout.

> In 99% of the cases it's the
> first oops that counts, and most people do not have serial console set up,
> so writing the first oops down from screen is important.

Good point. Problem is, we're getting some reported lockups in which we
need to know what the other CPUs are doing (the other 1%). If a critical
spinlock is stuck on, we don't get to type `dmesg'.

So... In this updated patch the console_silent() call remains
as you designed it, but the nmi_watchdog kernel boot parameter has
been overloaded so that

nmi_watchdog=2

will now cause _all_ NMI oops messages to be displayed on the console.

Is this OK?



--- linux-2.4.0-test11-pre4/include/linux/kernel.h Sun Oct 15 01:27:46 2000
+++ linux-akpm/include/linux/kernel.h Wed Nov 15 20:31:29 2000
@@ -62,6 +62,7 @@

asmlinkage int printk(const char * fmt, ...)
__attribute__ ((format (printf, 1, 2)));
+extern int oops_in_progress;

#if DEBUG
#define pr_debug(fmt,arg...) \
--- linux-2.4.0-test11-pre4/include/asm-i386/page.h Sat Nov 4 16:22:49 2000
+++ linux-akpm/include/asm-i386/page.h Wed Nov 15 20:31:29 2000
@@ -88,8 +88,9 @@
* Tell the user there is some problem. Beep too, so we can
* see^H^H^Hhear bugs in early bootup as well!
*/
+extern void do_BUG(const char *file, int line);
#define BUG() do { \
- printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ do_BUG(__FILE__, __LINE__); \
__asm__ __volatile__(".byte 0x0f,0x0b"); \
} while (0)

--- linux-2.4.0-test11-pre4/kernel/printk.c Sat Nov 4 16:22:49 2000
+++ linux-akpm/kernel/printk.c Wed Nov 15 20:31:29 2000
@@ -51,6 +51,7 @@
static unsigned long logged_chars;
struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
static int preferred_console = -1;
+int oops_in_progress;

/*
* Setup a list of consoles. Called from init/main.c
@@ -260,6 +261,8 @@
static signed char msg_level = -1;
long flags;

+ if (oops_in_progress)
+ spin_lock_init(&console_lock);
spin_lock_irqsave(&console_lock, flags);
va_start(args, fmt);
i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */
@@ -308,7 +311,8 @@
msg_level = -1;
}
spin_unlock_irqrestore(&console_lock, flags);
- wake_up_interruptible(&log_wait);
+ if (!oops_in_progress)
+ wake_up_interruptible(&log_wait);
return i;
}

--- linux-2.4.0-test11-pre4/arch/i386/mm/fault.c Mon Nov 13 18:23:49 2000
+++ linux-akpm/arch/i386/mm/fault.c Wed Nov 15 20:31:29 2000
@@ -77,17 +77,40 @@
return 0;
}

-extern spinlock_t console_lock, timerlist_lock;
+#ifdef CONFIG_SMP
+extern unsigned volatile int global_irq_lock;
+#endif

/*
* Unlock any spinlocks which will prevent us from getting the
- * message out (timerlist_lock is aquired through the
- * console unblank code)
+ * message out and tell the printk/console paths that an emergency
+ * message is coming through
+ */
+void bust_spinlocks(int yes)
+{
+ if (yes) {
+ oops_in_progress = 1;
+#ifdef CONFIG_SMP
+ global_irq_lock = 0; /* Many serial drivers do __global_cli() */
+#endif
+ } else {
+ oops_in_progress = 0;
+ /*
+ * OK, the message is on the console. Now we call printk()
+ * without oops_in_progress set so that printk will give syslogd
+ * a poke. Hold onto your hats...
+ */
+ printk("");
+ }
+}
+
+/*
+ * Called from the BUG() macro.
*/
-void bust_spinlocks(void)
+void do_BUG(const char *file, int line)
{
- spin_lock_init(&console_lock);
- spin_lock_init(&timerlist_lock);
+ bust_spinlocks(1);
+ printk("kernel BUG at %s:%d!\n", file, line);
}

asmlinkage void do_invalid_op(struct pt_regs *, unsigned long);
@@ -264,8 +287,7 @@
* terminate things with extreme prejudice.
*/

- bust_spinlocks();
-
+ bust_spinlocks(1);
if (address < PAGE_SIZE)
printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
else
@@ -283,6 +305,7 @@
printk(KERN_ALERT "*pte = %08lx\n", page);
}
die("Oops", regs, error_code);
+ bust_spinlocks(0);
do_exit(SIGKILL);

/*
--- linux-2.4.0-test11-pre4/arch/i386/kernel/traps.c Mon Nov 13 18:23:49 2000
+++ linux-akpm/arch/i386/kernel/traps.c Wed Nov 15 21:57:49 2000
@@ -63,7 +63,7 @@
struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };

extern int console_loglevel;
-extern void bust_spinlocks(void);
+extern void bust_spinlocks(int yes);

static inline void console_silent(void)
{
@@ -209,9 +209,10 @@
{
console_verbose();
spin_lock_irq(&die_lock);
+ bust_spinlocks(1);
printk("%s: %04lx\n", str, err & 0xffff);
show_registers(regs);
-
+ bust_spinlocks(0);
spin_unlock_irq(&die_lock);
do_exit(SIGSEGV);
}
@@ -414,7 +415,7 @@
* here too!]
*/

- static unsigned int last_irq_sums [NR_CPUS],
+ static unsigned int last_irq_sums [NR_CPUS],
alert_counter [NR_CPUS];

/*
@@ -437,12 +438,15 @@
* We are in trouble anyway, lets at least try
* to get a message out.
*/
- bust_spinlocks();
+ bust_spinlocks(1);
printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu);
show_registers(regs);
- printk("console shuts up ...\n");
- console_silent();
+ if (nmi_watchdog <= 1) {
+ printk("Console shuts up. Use `dmesg' or boot with `nmi_watchdog=2' to see more logs.\n");
+ console_silent();
+ }
spin_unlock(&nmi_print_lock);
+ bust_spinlocks(0);
do_exit(SIGSEGV);
}
} else {
--- linux-2.4.0-test11-pre4/arch/i386/kernel/i386_ksyms.c Mon Nov 13 18:23:49 2000
+++ linux-akpm/arch/i386/kernel/i386_ksyms.c Wed Nov 15 20:31:29 2000
@@ -157,3 +157,5 @@
#ifdef CONFIG_X86_PAE
EXPORT_SYMBOL(empty_zero_page);
#endif
+
+EXPORT_SYMBOL(do_BUG);
--- linux-2.4.0-test11-pre4/drivers/char/console.c Mon Nov 13 18:23:49 2000
+++ linux-akpm/drivers/char/console.c Wed Nov 15 20:31:29 2000
@@ -2135,6 +2135,7 @@
}
}
set_cursor(currcons);
+ poke_blanked_console();

quit:
clear_bit(0, &printing);
@@ -2661,7 +2662,7 @@
return;
}
console_timer.function = blank_screen;
- if (blankinterval) {
+ if (blankinterval && !oops_in_progress) {
mod_timer(&console_timer, jiffies + blankinterval);
}

@@ -2683,14 +2684,18 @@

void poke_blanked_console(void)
{
- del_timer(&console_timer); /* Can't use _sync here: called from tasklet */
- if (vt_cons[fg_console]->vc_mode == KD_GRAPHICS)
- return;
- if (console_blanked) {
- console_timer.function = unblank_screen_t;
- mod_timer(&console_timer, jiffies); /* Now */
- } else if (blankinterval) {
- mod_timer(&console_timer, jiffies + blankinterval);
+ if (oops_in_progress) {
+ unblank_screen(); /* Just do it */
+ } else {
+ del_timer(&console_timer); /* Can't use _sync here: called from tasklet */
+ if (vt_cons[fg_console]->vc_mode == KD_GRAPHICS)
+ return;
+ if (console_blanked) {
+ console_timer.function = unblank_screen_t;
+ mod_timer(&console_timer, jiffies); /* Now */
+ } else if (blankinterval) {
+ mod_timer(&console_timer, jiffies + blankinterval);
+ }
}
}

--- linux-2.4.0-test11-pre4/Documentation/nmi_watchdog.txt Thu Aug 24 21:07:14 2000
+++ linux-akpm/Documentation/nmi_watchdog.txt Wed Nov 15 21:51:17 2000
@@ -24,8 +24,21 @@
cannot even accept NMI interrupts, or the crash has made the kernel
unable to print messages.

-NOTE: currently the NMI-oopser is enabled unconditionally on x86 SMP
-boxes.
+The NMI-oopser defaults to `1' (enabled) on x86 SMP boxes.
+
+The NMI-oopser only displays the first oops message on the console. This
+is to prevent vital information from scrolling off the scren if more
+errors occur. You can find additional information (such as NMI oops
+information from other CPUs) by looking in your system logs or with
+the `dmesg' command.
+
+If your system is crashing hard and preventing you from running `dmesg'
+you can tell the NMI oopser to display all NMI oops messages (not just
+the first one) by setting
+
+ nmi_watchdog=2
+
+on the kernel boot command line or in lilo.conf, as shown above.

[ feel free to send bug reports, suggestions and patches to
Ingo Molnar <[email protected]> or the Linux SMP mailing

2000-11-15 11:44:17

by Ingo Molnar

[permalink] [raw]
Subject: Re: [prepatch] removal of oops->printk deadlocks


On Wed, 15 Nov 2000, Andrew Morton wrote:

> [...] Problem is, we're getting some reported lockups in which we need
> to know what the other CPUs are doing (the other 1%). If a critical
> spinlock is stuck on, we don't get to type `dmesg'.

i know - but we cannot please everyone, so i went for the 99% :-)

> So... In this updated patch the console_silent() call remains
> as you designed it, but the nmi_watchdog kernel boot parameter has
> been overloaded so that
>
> nmi_watchdog=2
>
> will now cause _all_ NMI oops messages to be displayed on the console.
>
> Is this OK?

this collides with the UP-IOAPIC path's use of nmi_watchdog == 2 ...

i'd rather suggest a cleaner, "nmi_watchdog=2,verbose" (default: silent)
type of boot parameter, it looks like the NMI watchdog needs more
parameters.

Ingo

2000-11-15 11:47:37

by Andrew Morton

[permalink] [raw]
Subject: Re: [prepatch] removal of oops->printk deadlocks

"Pawe? Kot" wrote:
>
> Hi,
>
> The whole oops produced by the 2.4.0-test11pre4 with this patch is in the
> attachment. Other system info is tehere as well.
>
> Oops is produced when runnning big-tables test from mysql sql-bench.
> It's reproducable. The machine is running only mysqld.

Thank you. Quad 700 meg Xeons? Nice machine, that.

The NMI oops changes are working well. We can see three CPUs are
stuck in different places spinning on the same lock with interrupts
disabled, but unfortunately something went wrong with your `ksymoops'
run.

Could you please rerun ksymoops, and make sure that you use the
correct symbol file? Take the `System.map' from the directory
where you built the kernel and run

ksymoops -m System.map < log_file

-

2000-11-15 12:00:39

by Pawel Kot

[permalink] [raw]
Subject: Re: [prepatch] removal of oops->printk deadlocks

On Wed, 15 Nov 2000, Andrew Morton wrote:

> "Pawe? Kot" wrote:
> >
> > Hi,
> >
> > The whole oops produced by the 2.4.0-test11pre4 with this patch is in the
> > attachment. Other system info is tehere as well.
> >
> > Oops is produced when runnning big-tables test from mysql sql-bench.
> > It's reproducable. The machine is running only mysqld.
>
> Thank you. Quad 700 meg Xeons? Nice machine, that.

Yes. 54 seconds for compiling kernel is quite good result :-)

> The NMI oops changes are working well. We can see three CPUs are
> stuck in different places spinning on the same lock with interrupts
> disabled, but unfortunately something went wrong with your `ksymoops'
> run.
>
> Could you please rerun ksymoops, and make sure that you use the
> correct symbol file? Take the `System.map' from the directory
> where you built the kernel and run
>
> ksymoops -m System.map < log_file

In attachment. But don't beat me. I think I found the oops reason.
/dev/shm was not mounted. After mounting it I couldn't get an oops yet.
Could this be a reason?

regards and thanks for help
pkot
--
mailto:[email protected]
http://urtica.linuxnews.pl/~pkot/
http://newsreader.linuxnews.pl/


Attachments:
oops2.ksyms (8.36 kB)

2000-11-15 12:09:31

by Andrew Morton

[permalink] [raw]
Subject: Re: [prepatch] removal of oops->printk deadlocks

"Pawe? Kot" wrote:
>
> In attachment. But don't beat me. I think I found the oops reason.
> /dev/shm was not mounted. After mounting it I couldn't get an oops yet.
> Could this be a reason?

No.

>>EIP; c010b5ee <__read_lock_failed+6/18> <=====
Trace; c02026f7 <stext_lock+86b/8c14>
Trace; c0120404 <sys_sched_setscheduler+14/18>
^^^^^^^^^^^^^^^^^^^^^^

This was fixed in test11-pre5. Please retest with that kernel.

Thanks again.

-

2000-11-15 13:53:50

by Andrew Morton

[permalink] [raw]
Subject: Re: [prepatch] removal of oops->printk deadlocks

Ingo Molnar wrote:
>
> > So... In this updated patch the console_silent() call remains
> > as you designed it, but the nmi_watchdog kernel boot parameter has
> > been overloaded so that
> >
> > nmi_watchdog=2
> >
> > will now cause _all_ NMI oops messages to be displayed on the console.
> >
> > Is this OK?
>
> this collides with the UP-IOAPIC path's use of nmi_watchdog == 2 ...
>
> i'd rather suggest a cleaner, "nmi_watchdog=2,verbose" (default: silent)
> type of boot parameter, it looks like the NMI watchdog needs more
> parameters.


OK, so the options are now:

nmi_watchdog=0
nmi_watchdog=1
nmi_watchdog=1,verbose


Third time lucky?



--- linux-2.4.0-test11-pre5/include/linux/kernel.h Sun Oct 15 01:27:46 2000
+++ linux-akpm/include/linux/kernel.h Wed Nov 15 23:25:48 2000
@@ -62,6 +62,7 @@

asmlinkage int printk(const char * fmt, ...)
__attribute__ ((format (printf, 1, 2)));
+extern int oops_in_progress;

#if DEBUG
#define pr_debug(fmt,arg...) \
--- linux-2.4.0-test11-pre5/include/asm-i386/page.h Sat Nov 4 16:22:49 2000
+++ linux-akpm/include/asm-i386/page.h Wed Nov 15 23:27:32 2000
@@ -88,8 +88,9 @@
* Tell the user there is some problem. Beep too, so we can
* see^H^H^Hhear bugs in early bootup as well!
*/
+extern void do_BUG(const char *file, int line);
#define BUG() do { \
- printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ do_BUG(__FILE__, __LINE__); \
__asm__ __volatile__(".byte 0x0f,0x0b"); \
} while (0)

--- linux-2.4.0-test11-pre5/kernel/printk.c Sat Nov 4 16:22:49 2000
+++ linux-akpm/kernel/printk.c Wed Nov 15 23:25:48 2000
@@ -51,6 +51,7 @@
static unsigned long logged_chars;
struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
static int preferred_console = -1;
+int oops_in_progress;

/*
* Setup a list of consoles. Called from init/main.c
@@ -251,6 +252,9 @@
return do_syslog(type, buf, len);
}

+/*
+ * printk("") may be used to wake up klogd
+ */
asmlinkage int printk(const char *fmt, ...)
{
va_list args;
@@ -260,6 +264,8 @@
static signed char msg_level = -1;
long flags;

+ if (oops_in_progress)
+ spin_lock_init(&console_lock);
spin_lock_irqsave(&console_lock, flags);
va_start(args, fmt);
i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */
@@ -308,7 +314,8 @@
msg_level = -1;
}
spin_unlock_irqrestore(&console_lock, flags);
- wake_up_interruptible(&log_wait);
+ if (!oops_in_progress)
+ wake_up_interruptible(&log_wait);
return i;
}

--- linux-2.4.0-test11-pre5/arch/i386/mm/fault.c Wed Nov 15 23:00:16 2000
+++ linux-akpm/arch/i386/mm/fault.c Thu Nov 16 00:14:37 2000
@@ -77,17 +77,37 @@
return 0;
}

-extern spinlock_t console_lock, timerlist_lock;
+#ifdef CONFIG_SMP
+extern unsigned volatile int global_irq_lock;
+#endif

/*
* Unlock any spinlocks which will prevent us from getting the
- * message out (timerlist_lock is aquired through the
- * console unblank code)
+ * message out and tell the printk/console paths that an emergency
+ * message is coming through
*/
-void bust_spinlocks(void)
+void bust_spinlocks(int yes)
+{
+ if (yes) {
+ oops_in_progress = 1;
+#ifdef CONFIG_SMP
+ global_irq_lock = 0; /* Many serial drivers do __global_cli() */
+#endif
+ } else {
+ oops_in_progress = 0;
+ /*
+ * OK, the message is on the console. Now we call printk()
+ * without oops_in_progress set so that printk will give klogd
+ * a poke. Hold onto your hats...
+ */
+ printk("");
+ }
+}
+
+void do_BUG(const char *file, int line)
{
- spin_lock_init(&console_lock);
- spin_lock_init(&timerlist_lock);
+ bust_spinlocks(1);
+ printk("kernel BUG at %s:%d!\n", file, line);
}

asmlinkage void do_invalid_op(struct pt_regs *, unsigned long);
@@ -264,8 +284,7 @@
* terminate things with extreme prejudice.
*/

- bust_spinlocks();
-
+ bust_spinlocks(1);
if (address < PAGE_SIZE)
printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
else
@@ -283,6 +302,7 @@
printk(KERN_ALERT "*pte = %08lx\n", page);
}
die("Oops", regs, error_code);
+ bust_spinlocks(0);
do_exit(SIGKILL);

/*
--- linux-2.4.0-test11-pre5/arch/i386/kernel/traps.c Wed Nov 15 23:00:16 2000
+++ linux-akpm/arch/i386/kernel/traps.c Wed Nov 15 23:49:00 2000
@@ -63,7 +63,7 @@
struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };

extern int console_loglevel;
-extern void bust_spinlocks(void);
+extern void bust_spinlocks(int yes);

static inline void console_silent(void)
{
@@ -209,9 +209,10 @@
{
console_verbose();
spin_lock_irq(&die_lock);
+ bust_spinlocks(1);
printk("%s: %04lx\n", str, err & 0xffff);
show_registers(regs);
-
+ bust_spinlocks(0);
spin_unlock_irq(&die_lock);
do_exit(SIGSEGV);
}
@@ -386,10 +387,14 @@
#if CONFIG_X86_IO_APIC

int nmi_watchdog = 1;
+static int nmi_watchdog_verbose;

static int __init setup_nmi_watchdog(char *str)
{
- get_option(&str, &nmi_watchdog);
+ if (get_option(&str, &nmi_watchdog) == 2) {
+ if (strcmp(str, "verbose") == 0)
+ nmi_watchdog_verbose = 1;
+ }
return 1;
}

@@ -414,7 +419,7 @@
* here too!]
*/

- static unsigned int last_irq_sums [NR_CPUS],
+ static unsigned int last_irq_sums [NR_CPUS],
alert_counter [NR_CPUS];

/*
@@ -437,12 +442,15 @@
* We are in trouble anyway, lets at least try
* to get a message out.
*/
- bust_spinlocks();
+ bust_spinlocks(1);
printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu);
show_registers(regs);
- printk("console shuts up ...\n");
- console_silent();
+ if (nmi_watchdog_verbose == 0) {
+ printk("Console shuts up. Run dmesg or boot with `nmi_watchdog=1,verbose' for more logs\n");
+ console_silent();
+ }
spin_unlock(&nmi_print_lock);
+ bust_spinlocks(0);
do_exit(SIGSEGV);
}
} else {
--- linux-2.4.0-test11-pre5/arch/i386/kernel/i386_ksyms.c Wed Nov 15 23:00:16 2000
+++ linux-akpm/arch/i386/kernel/i386_ksyms.c Wed Nov 15 23:25:48 2000
@@ -157,3 +157,5 @@
#ifdef CONFIG_X86_PAE
EXPORT_SYMBOL(empty_zero_page);
#endif
+
+EXPORT_SYMBOL(do_BUG);
--- linux-2.4.0-test11-pre5/drivers/char/console.c Wed Nov 15 23:00:16 2000
+++ linux-akpm/drivers/char/console.c Wed Nov 15 23:25:48 2000
@@ -2135,6 +2135,7 @@
}
}
set_cursor(currcons);
+ poke_blanked_console();

quit:
clear_bit(0, &printing);
@@ -2661,7 +2662,7 @@
return;
}
console_timer.function = blank_screen;
- if (blankinterval) {
+ if (blankinterval && !oops_in_progress) {
mod_timer(&console_timer, jiffies + blankinterval);
}

@@ -2683,14 +2684,18 @@

void poke_blanked_console(void)
{
- del_timer(&console_timer); /* Can't use _sync here: called from tasklet */
- if (vt_cons[fg_console]->vc_mode == KD_GRAPHICS)
- return;
- if (console_blanked) {
- console_timer.function = unblank_screen_t;
- mod_timer(&console_timer, jiffies); /* Now */
- } else if (blankinterval) {
- mod_timer(&console_timer, jiffies + blankinterval);
+ if (oops_in_progress) {
+ unblank_screen(); /* Just do it */
+ } else {
+ del_timer(&console_timer); /* Can't use _sync here: called from tasklet */
+ if (vt_cons[fg_console]->vc_mode == KD_GRAPHICS)
+ return;
+ if (console_blanked) {
+ console_timer.function = unblank_screen_t;
+ mod_timer(&console_timer, jiffies); /* Now */
+ } else if (blankinterval) {
+ mod_timer(&console_timer, jiffies + blankinterval);
+ }
}
}

--- linux-2.4.0-test11-pre5/Documentation/nmi_watchdog.txt Thu Aug 24 21:07:14 2000
+++ linux-akpm/Documentation/nmi_watchdog.txt Wed Nov 15 23:25:48 2000
@@ -24,8 +24,21 @@
cannot even accept NMI interrupts, or the crash has made the kernel
unable to print messages.

-NOTE: currently the NMI-oopser is enabled unconditionally on x86 SMP
-boxes.
+The NMI-oopser defaults to `1' (enabled) on x86 SMP boxes.
+
+The NMI-oopser only displays the first oops message on the console. This
+is to prevent vital information from scrolling off the scren if more
+errors occur. You can find additional information (such as NMI oops
+information from other CPUs) by looking in your system logs or with
+the `dmesg' command.
+
+If your system is crashing hard and preventing you from running `dmesg'
+you can tell the NMI oopser to display all NMI oops messages (not just
+the first one) by setting
+
+ nmi_watchdog=1,verbose
+
+on the kernel boot command line or in lilo.conf, as shown above.

[ feel free to send bug reports, suggestions and patches to
Ingo Molnar <[email protected]> or the Linux SMP mailing

2000-11-15 13:56:10

by Ingo Molnar

[permalink] [raw]
Subject: Re: [prepatch] removal of oops->printk deadlocks


On Thu, 16 Nov 2000, Andrew Morton wrote:

> OK, so the options are now:
>
> nmi_watchdog=0
> nmi_watchdog=1
> nmi_watchdog=1,verbose

looks good for me!

Ingo