2008-01-25 14:54:57

by Bodo Eggert

[permalink] [raw]
Subject: [PATCH] Introduce softpanic

Enabling this option changes a hard panic on boot errors to a
soft panic, which does not stop the system completely.
You can still scroll the screen and read the messages.

Signed-Off-By: Bodo Eggert <[email protected]>

diff -pruN -X dontdiff linux-2.6.24.pure/include/linux/kernel.h linux-2.6.24.softpanic/include/linux/kernel.h
--- linux-2.6.24.pure/include/linux/kernel.h 2008-01-25 15:09:36.000000000 +0100
+++ linux-2.6.24.softpanic/include/linux/kernel.h 2008-01-25 15:31:26.000000000 +0100
@@ -130,6 +130,12 @@ extern struct atomic_notifier_head panic
extern long (*panic_blink)(long time);
NORET_TYPE void panic(const char * fmt, ...)
__attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
+#ifdef SOFTPANIC
+NORET_TYPE void softpanic(const char *fmt, ...)
+ __attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
+#else
+# define softpanic(...) do { panic(__VA_ARGS__); } while (0)
+#endif
extern void oops_enter(void);
extern void oops_exit(void);
extern int oops_may_print(void);
diff -pruN -X dontdiff linux-2.6.24.pure/init/Kconfig linux-2.6.24.softpanic/init/Kconfig
--- linux-2.6.24.pure/init/Kconfig 2008-01-25 15:09:38.000000000 +0100
+++ linux-2.6.24.softpanic/init/Kconfig 2008-01-25 15:15:08.000000000 +0100
@@ -526,6 +526,14 @@ config BUG
option for embedded systems with no facilities for reporting errors.
Just say Y.

+config SOFTPANIC
+ bool "Enable softpanic for boot errors" if EMBEDDED
+ default y
+ help
+ Enabling this option changes a hard panic on boot errors to a
+ soft panic, which does not stop the system completely.
+ You can still scroll the screen and read the messages.
+
config ELF_CORE
default y
bool "Enable ELF core dumps" if EMBEDDED
diff -pruN -X dontdiff linux-2.6.24.pure/init/do_mounts.c linux-2.6.24.softpanic/init/do_mounts.c
--- linux-2.6.24.pure/init/do_mounts.c 2008-01-25 15:08:31.000000000 +0100
+++ linux-2.6.24.softpanic/init/do_mounts.c 2008-01-25 15:15:08.000000000 +0100
@@ -330,7 +330,7 @@ retry:
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");

printk_all_partitions();
- panic("VFS: Unable to mount root fs on %s", b);
+ softpanic("VFS: Unable to mount root fs on %s", b);
}

printk("List of all partitions:\n");
@@ -342,7 +342,7 @@ retry:
#ifdef CONFIG_BLOCK
__bdevname(ROOT_DEV, b);
#endif
- panic("VFS: Unable to mount root fs on %s", b);
+ softpanic("VFS: Unable to mount root fs on %s", b);
out:
putname(fs_names);
}
diff -pruN -X dontdiff linux-2.6.24.pure/init/main.c linux-2.6.24.softpanic/init/main.c
--- linux-2.6.24.pure/init/main.c 2008-01-25 15:09:38.000000000 +0100
+++ linux-2.6.24.softpanic/init/main.c 2008-01-25 15:15:08.000000000 +0100
@@ -585,7 +585,7 @@ asmlinkage void __init start_kernel(void
*/
console_init();
if (panic_later)
- panic(panic_later, panic_param);
+ softpanic(panic_later, panic_param);

lockdep_info();

@@ -800,7 +800,7 @@ static int noinline init_post(void)
run_init_process("/bin/init");
run_init_process("/bin/sh");

- panic("No init found. Try passing init= option to kernel.");
+ softpanic("No init found. Try passing init= option to kernel.");
}

static int __init kernel_init(void * unused)
diff -pruN -X dontdiff linux-2.6.24.pure/kernel/panic.c linux-2.6.24.softpanic/kernel/panic.c
--- linux-2.6.24.pure/kernel/panic.c 2008-01-25 15:09:38.000000000 +0100
+++ linux-2.6.24.softpanic/kernel/panic.c 2008-01-25 15:38:52.000000000 +0100
@@ -142,6 +142,66 @@ NORET_TYPE void panic(const char * fmt,

EXPORT_SYMBOL(panic);

+#ifdef SOFTPANIC
+NORET_TYPE void softpanic(const char *fmt, ...)
+{
+ long i;
+ static char buf[1024];
+ va_list args;
+#if defined(CONFIG_S390)
+ unsigned long caller = (unsigned long) __builtin_return_address(0);
+#endif
+
+ va_start(args, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+ printk(KERN_EMERG "Kernel panic - not syncing: %s\n", buf);
+
+ atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
+
+ if (!panic_blink)
+ panic_blink = no_blink;
+
+ if (panic_timeout > 0) {
+ /*
+ * Delay timeout seconds before rebooting the machine.
+ * We can't use the "normal" timers since we just panicked..
+ */
+ printk(KERN_EMERG "Rebooting in %d seconds..", panic_timeout);
+ for (i = 0; i < panic_timeout*1000; ) {
+ touch_nmi_watchdog();
+ i += panic_blink(i);
+ mdelay(1);
+ i++;
+ }
+ /* This will not be a clean reboot, with everything
+ * shutting down. But if there is a chance of
+ * rebooting the system it will be rebooted.
+ */
+ kernel_restart(NULL);
+ }
+#ifdef __sparc__
+ {
+ extern int stop_a_enabled;
+ /* Make sure the user can actually press Stop-A (L1-A) */
+ stop_a_enabled = 1;
+ printk(KERN_EMERG
+ "Press Stop-A (L1-A) to return to the boot prom\n");
+ }
+#endif
+#if defined(CONFIG_S390)
+ disabled_wait(caller);
+#endif
+ for (i = 0;;) {
+ touch_softlockup_watchdog();
+ i += panic_blink(i);
+ msleep(1);
+ i++;
+ }
+}
+EXPORT_SYMBOL(softpanic);
+#endif
+
/**
* print_tainted - return a string to represent the kernel taint state.
*
--
Top 100 things you don't want the sysadmin to say:
21. where did you say those backup tapes were kept?


2008-01-25 15:01:38

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] Introduce softpanic

Bodo Eggert <[email protected]> writes:

> Enabling this option changes a hard panic on boot errors to a
> soft panic, which does not stop the system completely.
> You can still scroll the screen and read the messages.

I don't think it's a good idea to keep the network running in the
soft panic. A lot of people have set ups that use ping was a watchdog
and with nfsroot/ip=dhcp ping does work quite well before
mounting root and then the watchdog might not pick up the
soft panic.

Using a polled keyboard driver after panic seems to be the better
option to me, but if you want softpanic you should probably
at least add a suitable panic notifier to the network stack
to shut it all down.

-Andi

2008-01-25 15:21:19

by Jan Engelhardt

[permalink] [raw]
Subject: Re: [PATCH] Introduce softpanic


On Jan 25 2008 15:54, Bodo Eggert wrote:
>+#ifdef SOFTPANIC

#ifdef CONFIG_SOFTPANIC?

2008-01-25 15:23:03

by Jan Engelhardt

[permalink] [raw]
Subject: Re: [PATCH] Introduce softpanic


On Jan 25 2008 16:01, Andi Kleen wrote:
>Bodo Eggert <[email protected]> writes:
>
>> Enabling this option changes a hard panic on boot errors to a
>> soft panic, which does not stop the system completely.
>> You can still scroll the screen and read the messages.
>
>I don't think it's a good idea to keep the network running in the
>soft panic. A lot of people have set ups that use ping was a watchdog
>and with nfsroot/ip=dhcp ping does work quite well before
>mounting root and then the watchdog might not pick up the
>soft panic.

If the machine goes oops and softlocks (e.g. bug in root filesystem,
lock held but not released, box essentially dead), ping also still
works even though you'd probably want to trigger the watchdog too.

>Using a polled keyboard driver after panic seems to be the better
>option to me, but if you want softpanic you should probably
>at least add a suitable panic notifier to the network stack
>to shut it all down.

or that.

2008-01-25 15:26:56

by Jiri Kosina

[permalink] [raw]
Subject: Re: [PATCH] Introduce softpanic

On Fri, 25 Jan 2008, Bodo Eggert wrote:

> Enabling this option changes a hard panic on boot errors to a
> soft panic, which does not stop the system completely.
> You can still scroll the screen and read the messages.

Hi Bodo,

if you are willing to put some effort into the ability to scroll the
console after panic, it might be worthwile to look making the keyboard
driver work even after panic (i.e. poll the keyboard, rather than relying
on interrupts).

That has significantly smaller potential of breaking because the system is
expected to be dead after panic (watchdogs, broken data structures in
memory, etc).

--
Jiri Kosina

2008-01-25 17:17:28

by Valdis Klētnieks

[permalink] [raw]
Subject: Re: [PATCH] Introduce softpanic

On Fri, 25 Jan 2008 16:26:45 +0100, Jiri Kosina said:
> On Fri, 25 Jan 2008, Bodo Eggert wrote:
>
> > Enabling this option changes a hard panic on boot errors to a
> > soft panic, which does not stop the system completely.
> > You can still scroll the screen and read the messages.
>
> Hi Bodo,
>
> if you are willing to put some effort into the ability to scroll the
> console after panic, it might be worthwile to look making the keyboard
> driver work even after panic (i.e. poll the keyboard, rather than relying
> on interrupts).
>
> That has significantly smaller potential of breaking because the system is
> expected to be dead after panic (watchdogs, broken data structures in
> memory, etc).

I think Bodo is trying to address one specific (but quite common) corner
case, where we currently panic() only because there's nothing else that
makes sense - if we can't find the root fs, we're hosed. But (barring *other*
oops/etc issues), we're still up and running, our data structures are intact,
and there's no real reason we can't use such kernel services as we want (such
as the screen scroller) to help the sysadmin figure out what he misconfigured.

If we were trying to do a *general case* handler for post-panic, yes, we'd
want to do the polled keyboard and all that....


Attachments:
(No filename) (226.00 B)

2008-01-25 17:32:09

by Bodo Eggert

[permalink] [raw]
Subject: Re: [PATCH] Introduce softpanic

On Fri, 25 Jan 2008, Andi Kleen wrote:
> Bodo Eggert <[email protected]> writes:

> > Enabling this option changes a hard panic on boot errors to a
> > soft panic, which does not stop the system completely.
> > You can still scroll the screen and read the messages.
>
> I don't think it's a good idea to keep the network running in the
> soft panic. A lot of people have set ups that use ping was a watchdog
> and with nfsroot/ip=dhcp ping does work quite well before
> mounting root and then the watchdog might not pick up the
> soft panic.

> Using a polled keyboard driver after panic seems to be the better
> option to me, but if you want softpanic you should probably
> at least add a suitable panic notifier to the network stack
> to shut it all down.

I have no idea on how to do it. If somebody has a big red arrow pointing to
a HOWTO, I can give it a try.

OTOH, I think the panic timeout should do the job nicely.
--
If you talk about race, it does not make you a racist. If you see distinctions
between the genders, it does not make you a sexist. If you think critically
about a denomination, it does not make you anti-religion. If you accept but
don't celebrate homosexuality, it does not make you a homophobe.Charlton Heston

2008-01-25 17:37:42

by Bodo Eggert

[permalink] [raw]
Subject: Re: [PATCH] Introduce softpanic

On Fri, 25 Jan 2008, Jan Engelhardt wrote:
> On Jan 25 2008 15:54, Bodo Eggert wrote:

> >+#ifdef SOFTPANIC
>
> #ifdef CONFIG_SOFTPANIC?

Thanks. I remember having fixed it ...
--
Professionals are predictable, it's the amateurs that are dangerous.

2008-01-28 19:07:29

by Bodo Eggert

[permalink] [raw]
Subject: [PATCH] Introduce softpanic V.2

Enabling this option changes a hard panic on boot errors to a
soft panic, which does not stop the system completely.
You can still scroll the screen and read the messages.

Signed-Off-By: Bodo Eggert <[email protected]>

---

Fixed: s/SOFTPANIC/CONFIG_SOFTPANIC/

I did not implement shutting down the network on panic, which was requested
to let the watchdog reboot the machine. For this purpose, you should use
"panic=$n".


diff -pruN -X dontdiff linux-2.6.24.pure/include/linux/kernel.h linux-2.6.24.softpanic/include/linux/kernel.h
--- linux-2.6.24.pure/include/linux/kernel.h 2008-01-25 15:09:36.000000000 +0100
+++ linux-2.6.24.softpanic/include/linux/kernel.h 2008-01-25 15:31:26.000000000 +0100
@@ -130,6 +130,12 @@ extern struct atomic_notifier_head panic
extern long (*panic_blink)(long time);
NORET_TYPE void panic(const char * fmt, ...)
__attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
+#ifdef SOFTPANIC
+NORET_TYPE void softpanic(const char *fmt, ...)
+ __attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
+#else
+# define softpanic(...) do { panic(__VA_ARGS__); } while (0)
+#endif
extern void oops_enter(void);
extern void oops_exit(void);
extern int oops_may_print(void);
diff -pruN -X dontdiff linux-2.6.24.pure/init/Kconfig linux-2.6.24.softpanic/init/Kconfig
--- linux-2.6.24.pure/init/Kconfig 2008-01-25 15:09:38.000000000 +0100
+++ linux-2.6.24.softpanic/init/Kconfig 2008-01-25 15:15:08.000000000 +0100
@@ -526,6 +526,14 @@ config BUG
option for embedded systems with no facilities for reporting errors.
Just say Y.

+config SOFTPANIC
+ bool "Enable softpanic for boot errors" if EMBEDDED
+ default y
+ help
+ Enabling this option changes a hard panic on boot errors to a
+ soft panic, which does not stop the system completely.
+ You can still scroll the screen and read the messages.
+
config ELF_CORE
default y
bool "Enable ELF core dumps" if EMBEDDED
diff -pruN -X dontdiff linux-2.6.24.pure/init/do_mounts.c linux-2.6.24.softpanic/init/do_mounts.c
--- linux-2.6.24.pure/init/do_mounts.c 2008-01-25 15:08:31.000000000 +0100
+++ linux-2.6.24.softpanic/init/do_mounts.c 2008-01-25 15:15:08.000000000 +0100
@@ -330,7 +330,7 @@ retry:
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");

printk_all_partitions();
- panic("VFS: Unable to mount root fs on %s", b);
+ softpanic("VFS: Unable to mount root fs on %s", b);
}

printk("List of all partitions:\n");
@@ -342,7 +342,7 @@ retry:
#ifdef CONFIG_BLOCK
__bdevname(ROOT_DEV, b);
#endif
- panic("VFS: Unable to mount root fs on %s", b);
+ softpanic("VFS: Unable to mount root fs on %s", b);
out:
putname(fs_names);
}
diff -pruN -X dontdiff linux-2.6.24.pure/init/main.c linux-2.6.24.softpanic/init/main.c
--- linux-2.6.24.pure/init/main.c 2008-01-25 15:09:38.000000000 +0100
+++ linux-2.6.24.softpanic/init/main.c 2008-01-25 15:15:08.000000000 +0100
@@ -585,7 +585,7 @@ asmlinkage void __init start_kernel(void
*/
console_init();
if (panic_later)
- panic(panic_later, panic_param);
+ softpanic(panic_later, panic_param);

lockdep_info();

@@ -800,7 +800,7 @@ static int noinline init_post(void)
run_init_process("/bin/init");
run_init_process("/bin/sh");

- panic("No init found. Try passing init= option to kernel.");
+ softpanic("No init found. Try passing init= option to kernel.");
}

static int __init kernel_init(void * unused)
diff -pruN -X dontdiff linux-2.6.24.pure/kernel/panic.c linux-2.6.24.softpanic/kernel/panic.c
--- linux-2.6.24.pure/kernel/panic.c 2008-01-25 15:09:38.000000000 +0100
+++ linux-2.6.24.softpanic/kernel/panic.c 2008-01-25 18:37:59.000000000 +0100
@@ -142,6 +142,66 @@ NORET_TYPE void panic(const char * fmt,

EXPORT_SYMBOL(panic);

+#ifdef CONFIG_SOFTPANIC
+NORET_TYPE void softpanic(const char *fmt, ...)
+{
+ long i;
+ static char buf[1024];
+ va_list args;
+#if defined(CONFIG_S390)
+ unsigned long caller = (unsigned long) __builtin_return_address(0);
+#endif
+
+ va_start(args, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+ printk(KERN_EMERG "Kernel panic - not syncing: %s\n", buf);
+
+ atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
+
+ if (!panic_blink)
+ panic_blink = no_blink;
+
+ if (panic_timeout > 0) {
+ /*
+ * Delay timeout seconds before rebooting the machine.
+ * We can't use the "normal" timers since we just panicked..
+ */
+ printk(KERN_EMERG "Rebooting in %d seconds..", panic_timeout);
+ for (i = 0; i < panic_timeout*1000; ) {
+ touch_nmi_watchdog();
+ i += panic_blink(i);
+ mdelay(1);
+ i++;
+ }
+ /* This will not be a clean reboot, with everything
+ * shutting down. But if there is a chance of
+ * rebooting the system it will be rebooted.
+ */
+ kernel_restart(NULL);
+ }
+#ifdef __sparc__
+ {
+ extern int stop_a_enabled;
+ /* Make sure the user can actually press Stop-A (L1-A) */
+ stop_a_enabled = 1;
+ printk(KERN_EMERG
+ "Press Stop-A (L1-A) to return to the boot prom\n");
+ }
+#endif
+#if defined(CONFIG_S390)
+ disabled_wait(caller);
+#endif
+ for (i = 0;;) {
+ touch_softlockup_watchdog();
+ i += panic_blink(i);
+ msleep(1);
+ i++;
+ }
+}
+EXPORT_SYMBOL(softpanic);
+#endif
+
/**
* print_tainted - return a string to represent the kernel taint state.
*
--
Top 100 things you don't want the sysadmin to say:
15. now it's funny you should ask that, because I don't know either

2008-02-02 18:07:36

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] Introduce softpanic

On Fri 2008-01-25 16:01:28, Andi Kleen wrote:
> Bodo Eggert <[email protected]> writes:
>
> > Enabling this option changes a hard panic on boot errors to a
> > soft panic, which does not stop the system completely.
> > You can still scroll the screen and read the messages.
>
> I don't think it's a good idea to keep the network running in the
> soft panic. A lot of people have set ups that use ping was a watchdog
> and with nfsroot/ip=dhcp ping does work quite well before
> mounting root and then the watchdog might not pick up the
> soft panic.
>
> Using a polled keyboard driver after panic seems to be the better
> option to me, but if you want softpanic you should probably
> at least add a suitable panic notifier to the network stack
> to shut it all down.

OTOH this will allow netconsole/sysrq over it to still work after
softpanic, which is good. Lets not over engineer it, I think current
code is fine.

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html