2006-03-22 06:47:57

by Chris Wright

[permalink] [raw]
Subject: [RFC PATCH 26/35] Add Xen subarch reboot support

Add remote reboot capability, so that a virtual machine can be
rebooted, halted or 'powered off' by external management tools.

Signed-off-by: Ian Pratt <[email protected]>
Signed-off-by: Christian Limpach <[email protected]>
Signed-off-by: Chris Wright <[email protected]>
---
arch/i386/kernel/Makefile | 1
arch/i386/mach-xen/reboot.c | 265 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 266 insertions(+)

--- xen-subarch-2.6.orig/arch/i386/kernel/Makefile
+++ xen-subarch-2.6/arch/i386/kernel/Makefile
@@ -49,6 +49,7 @@ hw_irq-y := i8259.o

hw_irq-$(CONFIG_XEN) := ../mach-xen/evtchn.o
time-$(CONFIG_XEN) := ../mach-xen/time.o
+reboot-$(CONFIG_XEN) := ../mach-xen/reboot.o

# vsyscall.o contains the vsyscall DSO images as __initdata.
# We must build both images before we can assemble it.
--- /dev/null
+++ xen-subarch-2.6/arch/i386/mach-xen/reboot.c
@@ -0,0 +1,265 @@
+#define __KERNEL_SYSCALLS__
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/sysrq.h>
+#include <linux/stringify.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <xen/evtchn.h>
+#include <asm/hypervisor.h>
+#ifdef CONFIG_XEN_XENBUS
+#include <xen/xenbus.h>
+#endif
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+#include <xen/xencons.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+#endif
+
+#define SHUTDOWN_INVALID -1
+#define SHUTDOWN_POWEROFF 0
+#define SHUTDOWN_REBOOT 1
+#define SHUTDOWN_SUSPEND 2
+/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
+ * report a crash, not be instructed to crash!
+ * HALT is the same as POWEROFF, as far as we're concerned. The tools use
+ * the distinction when we return the reason code to them.
+ */
+#define SHUTDOWN_HALT 4
+
+void machine_emergency_restart(void)
+{
+ /* We really want to get pending console data out before we die. */
+ xencons_force_flush();
+ HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_reboot);
+}
+
+void machine_restart(char * __unused)
+{
+ machine_emergency_restart();
+}
+
+void machine_halt(void)
+{
+ machine_power_off();
+}
+
+void machine_power_off(void)
+{
+ /* We really want to get pending console data out before we die. */
+ xencons_force_flush();
+ HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_poweroff);
+}
+
+int reboot_thru_bios = 0; /* for dmi_scan.c */
+EXPORT_SYMBOL(machine_restart);
+EXPORT_SYMBOL(machine_halt);
+EXPORT_SYMBOL(machine_power_off);
+
+
+/******************************************************************************
+ * Stop/pickle callback handling.
+ */
+
+#ifdef CONFIG_XEN_XENBUS
+/* Ignore multiple shutdown requests. */
+static int shutting_down = SHUTDOWN_INVALID;
+static void __shutdown_handler(void *unused);
+static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
+#endif
+
+#ifdef CONFIG_XEN_XENBUS
+static int shutdown_process(void *__unused)
+{
+ static char *envp[] = { "HOME=/", "TERM=linux",
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
+ static char *restart_argv[] = { "/sbin/reboot", NULL };
+ static char *poweroff_argv[] = { "/sbin/poweroff", NULL };
+
+ extern asmlinkage long sys_reboot(int magic1, int magic2,
+ unsigned int cmd, void *arg);
+
+ daemonize("shutdown");
+
+ switch (shutting_down) {
+ case SHUTDOWN_POWEROFF:
+ case SHUTDOWN_HALT:
+ if (execve("/sbin/poweroff", poweroff_argv, envp) < 0) {
+ sys_reboot(LINUX_REBOOT_MAGIC1,
+ LINUX_REBOOT_MAGIC2,
+ LINUX_REBOOT_CMD_POWER_OFF,
+ NULL);
+ }
+ break;
+
+ case SHUTDOWN_REBOOT:
+ if (execve("/sbin/reboot", restart_argv, envp) < 0) {
+ sys_reboot(LINUX_REBOOT_MAGIC1,
+ LINUX_REBOOT_MAGIC2,
+ LINUX_REBOOT_CMD_RESTART,
+ NULL);
+ }
+ break;
+ }
+
+ shutting_down = SHUTDOWN_INVALID; /* could try again */
+
+ return 0;
+}
+
+static void __shutdown_handler(void *unused)
+{
+ int err = 0;
+
+ if (shutting_down != SHUTDOWN_SUSPEND)
+ err = kernel_thread(shutdown_process, NULL,
+ CLONE_FS | CLONE_FILES);
+
+ if (err < 0) {
+ printk(KERN_WARNING "Error creating shutdown process (%d): "
+ "retrying...\n", -err);
+ schedule_delayed_work(&shutdown_work, HZ/2);
+ }
+}
+
+static void shutdown_handler(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ char *str;
+ xenbus_transaction_t xbt;
+ int err;
+
+ if (shutting_down != SHUTDOWN_INVALID)
+ return;
+
+ again:
+ err = xenbus_transaction_start(&xbt);
+ if (err)
+ return;
+ str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
+ /* Ignore read errors and empty reads. */
+ if (XENBUS_IS_ERR_READ(str)) {
+ xenbus_transaction_end(xbt, 1);
+ return;
+ }
+
+ xenbus_write(xbt, "control", "shutdown", "");
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err == -EAGAIN) {
+ kfree(str);
+ goto again;
+ }
+
+ if (strcmp(str, "poweroff") == 0)
+ shutting_down = SHUTDOWN_POWEROFF;
+ else if (strcmp(str, "reboot") == 0)
+ shutting_down = SHUTDOWN_REBOOT;
+ else if (strcmp(str, "suspend") == 0)
+ shutting_down = SHUTDOWN_SUSPEND;
+ else if (strcmp(str, "halt") == 0)
+ shutting_down = SHUTDOWN_HALT;
+ else {
+ printk("Ignoring shutdown request: %s\n", str);
+ shutting_down = SHUTDOWN_INVALID;
+ }
+
+ if (shutting_down != SHUTDOWN_INVALID)
+ schedule_work(&shutdown_work);
+
+ kfree(str);
+}
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
+ unsigned int len)
+{
+ char sysrq_key = '\0';
+ xenbus_transaction_t xbt;
+ int err;
+
+ again:
+ err = xenbus_transaction_start(&xbt);
+ if (err)
+ return;
+ if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
+ printk(KERN_ERR "Unable to read sysrq code in "
+ "control/sysrq\n");
+ xenbus_transaction_end(xbt, 1);
+ return;
+ }
+
+ if (sysrq_key != '\0')
+ xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err == -EAGAIN)
+ goto again;
+
+ if (sysrq_key != '\0') {
+ handle_sysrq(sysrq_key, NULL, NULL);
+ }
+}
+#endif
+
+static struct xenbus_watch shutdown_watch = {
+ .node = "control/shutdown",
+ .callback = shutdown_handler
+};
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static struct xenbus_watch sysrq_watch = {
+ .node ="control/sysrq",
+ .callback = sysrq_handler
+};
+#endif
+
+static struct notifier_block xenstore_notifier;
+
+static int setup_shutdown_watcher(struct notifier_block *notifier,
+ unsigned long event,
+ void *data)
+{
+ int err1 = 0;
+#ifdef CONFIG_MAGIC_SYSRQ
+ int err2 = 0;
+#endif
+
+ err1 = register_xenbus_watch(&shutdown_watch);
+#ifdef CONFIG_MAGIC_SYSRQ
+ err2 = register_xenbus_watch(&sysrq_watch);
+#endif
+
+ if (err1)
+ printk(KERN_ERR "Failed to set shutdown watcher\n");
+
+#ifdef CONFIG_MAGIC_SYSRQ
+ if (err2)
+ printk(KERN_ERR "Failed to set sysrq watcher\n");
+#endif
+
+ return NOTIFY_DONE;
+}
+
+static int __init setup_shutdown_event(void)
+{
+
+ xenstore_notifier.notifier_call = setup_shutdown_watcher;
+
+ register_xenstore_notifier(&xenstore_notifier);
+
+ return 0;
+}
+
+subsys_initcall(setup_shutdown_event);
+#endif

--


2006-03-22 08:40:40

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [RFC PATCH 26/35] Add Xen subarch reboot support

On Tue, 2006-03-21 at 22:31 -0800, Chris Wright wrote:
> +
> +#ifdef CONFIG_XEN_XENBUS
> +/* Ignore multiple shutdown requests. */
> +static int shutting_down = SHUTDOWN_INVALID;
> +static void __shutdown_handler(void *unused);
> +static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
> +#endif
> +
> +#ifdef CONFIG_XEN_XENBUS

eh why the re-ifdef


> +static int shutdown_process(void *__unused)
> +{
> + static char *envp[] = { "HOME=/", "TERM=linux",
> + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
> + static char *restart_argv[] = { "/sbin/reboot", NULL };
> + static char *poweroff_argv[] = { "/sbin/poweroff", NULL };
> +
> + extern asmlinkage long sys_reboot(int magic1, int magic2,
> + unsigned int cmd, void *arg);
> +
> + daemonize("shutdown");
> +
> + switch (shutting_down) {
> + case SHUTDOWN_POWEROFF:
> + case SHUTDOWN_HALT:
> + if (execve("/sbin/poweroff", poweroff_argv, envp) < 0) {
> + sys_reboot(LINUX_REBOOT_MAGIC1,
> + LINUX_REBOOT_MAGIC2,
> + LINUX_REBOOT_CMD_POWER_OFF,
> + NULL);
> + }
> + break;
> +
> + case SHUTDOWN_REBOOT:
> + if (execve("/sbin/reboot", restart_argv, envp) < 0) {
> + sys_reboot(LINUX_REBOOT_MAGIC1,
> + LINUX_REBOOT_MAGIC2,
> + LINUX_REBOOT_CMD_RESTART,
> + NULL);
> + }
> + break;
> + }
> +
> + shutting_down = SHUTDOWN_INVALID; /* could try again */
> +
> + return 0;
> +}

how is this function different from the generic one? If not, why aren't
you using the generic one?


> +static struct notifier_block xenstore_notifier;

what is this for? It's not exported and hardly used...


2006-03-22 10:22:29

by Keir Fraser

[permalink] [raw]
Subject: Re: [RFC PATCH 26/35] Add Xen subarch reboot support


On 22 Mar 2006, at 08:40, Arjan van de Ven wrote:

>> +static int shutdown_process(void *__unused)
>> +{
>> + static char *envp[] = { "HOME=/", "TERM=linux",
>> + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
>> + static char *restart_argv[] = { "/sbin/reboot", NULL };
>> + static char *poweroff_argv[] = { "/sbin/poweroff", NULL };
>
> how is this function different from the generic one? If not, why aren't
> you using the generic one?

The intent is to allow remote management tools to trigger a clean
shutdown of the virtual machine. That requires us to notify to
userspace, and this function does that by exec'ing one of the standard
userspace programs. Given the trigger is received by the kernel in the
first instance I don't know a better way of doing this. And if this is
the best way, I don't think there is generic code in the kernel which
does the same thing.

-- Keir

2006-03-22 10:39:49

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [RFC PATCH 26/35] Add Xen subarch reboot support

On Wed, 2006-03-22 at 10:22 +0000, Keir Fraser wrote:
> On 22 Mar 2006, at 08:40, Arjan van de Ven wrote:
>
> >> +static int shutdown_process(void *__unused)
> >> +{
> >> + static char *envp[] = { "HOME=/", "TERM=linux",
> >> + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
> >> + static char *restart_argv[] = { "/sbin/reboot", NULL };
> >> + static char *poweroff_argv[] = { "/sbin/poweroff", NULL };
> >
> > how is this function different from the generic one? If not, why aren't
> > you using the generic one?
>
> The intent is to allow remote management tools to trigger a clean
> shutdown of the virtual machine. That requires us to notify to
> userspace, and this function does that by exec'ing one of the standard
> userspace programs. Given the trigger is received by the kernel in the
> first instance I don't know a better way of doing this. And if this is
> the best way, I don't think there is generic code in the kernel which
> does the same thing.


well this isn't really different from the normal ctrl-alt-delete right?
I would strongly suggest to follow the normal ctrl-alt-del path.. that
follows the normal convention sysadmins are used to.
It's not "/sbin/poweroff" fwiw... at least not hardcoded. Following the
normal ctrl-alt-del codepath gets all the policy out of this kind of
thing as well..


2006-03-22 10:52:00

by Keir Fraser

[permalink] [raw]
Subject: Re: [RFC PATCH 26/35] Add Xen subarch reboot support


On 22 Mar 2006, at 10:39, Arjan van de Ven wrote:

>> The intent is to allow remote management tools to trigger a clean
>> shutdown of the virtual machine. That requires us to notify to
>> userspace, and this function does that by exec'ing one of the standard
>> userspace programs. Given the trigger is received by the kernel in the
>> first instance I don't know a better way of doing this. And if this is
>> the best way, I don't think there is generic code in the kernel which
>> does the same thing.
>
>
> well this isn't really different from the normal ctrl-alt-delete right?
> I would strongly suggest to follow the normal ctrl-alt-del path.. that
> follows the normal convention sysadmins are used to.
> It's not "/sbin/poweroff" fwiw... at least not hardcoded. Following the
> normal ctrl-alt-del codepath gets all the policy out of this kind of
> thing as well..

Hmm... that will work okay for reboot, where SIGINT to init is probably
a better strategy than what we do now. But we'd still need something
special for halt/shutdown. We followed the same principle for this as
sparc64/kernel/power.c.

-- Keir

2006-03-22 15:03:56

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC PATCH 26/35] Add Xen subarch reboot support

On Wednesday 22 March 2006 07:31, Chris Wright wrote:
> + static char *envp[] = { "HOME=/", "TERM=linux",
> + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
> + static char *restart_argv[] = { "/sbin/reboot", NULL };
> + static char *poweroff_argv[] = { "/sbin/poweroff", NULL };

It would be better if that was user configurable.

> + extern asmlinkage long sys_reboot(int magic1, int magic2,
> + unsigned int cmd, void *arg);

This is what linux/syscalls.h is there for.



> + daemonize("shutdown");

What is that good for?

> +
> + switch (shutting_down) {
> + case SHUTDOWN_POWEROFF:
> + case SHUTDOWN_HALT:
> + if (execve("/sbin/poweroff", poweroff_argv, envp) < 0) {

You should probably keep track if the execve already happened and if it is called
again do the sys_reboot directly.


-Andi

2006-03-26 22:41:01

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC PATCH 26/35] Add Xen subarch reboot support

On Wed 22-03-06 15:21:57, Andi Kleen wrote:
> On Wednesday 22 March 2006 07:31, Chris Wright wrote:
> > + static char *envp[] = { "HOME=/", "TERM=linux",
> > + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
> > + static char *restart_argv[] = { "/sbin/reboot", NULL };
> > + static char *poweroff_argv[] = { "/sbin/poweroff", NULL };
>
> It would be better if that was user configurable.

acpi also needs to shutdown machine on overheat. It would be nice to
consolidate all those places. New signal to init would be best, I'd
say.

--
Thanks, Sharp!