2007-11-19 06:59:27

by Huang, Ying

[permalink] [raw]
Subject: [PATCH 3/3 -mm] kexec based hibernation -v6: kexec hibernate/resume

This patch implements kexec based hibernate/resume. This is based on
the facility provided by kexec_jump. The ACPI methods are called at
specified environment to conform the ACPI specification. Two new
reboot commands are added to trigger hibernate/resume.

Signed-off-by: Huang Ying <[email protected]>

---
include/linux/kexec.h | 5 +
include/linux/reboot.h | 2
include/linux/suspend.h | 9 ++
kernel/power/disk.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sys.c | 42 +++++++++++++
5 files changed, 212 insertions(+), 1 deletion(-)

--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -21,6 +21,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
+#include <linux/kexec.h>

#include "power.h"

@@ -438,6 +439,160 @@ int hibernate(void)
return error;
}

+#ifdef CONFIG_KEXEC
+static void kexec_hibernate_power_down(void)
+{
+ switch (hibernation_mode) {
+ case HIBERNATION_TEST:
+ case HIBERNATION_TESTPROC:
+ break;
+ case HIBERNATION_REBOOT:
+ machine_restart(NULL);
+ break;
+ case HIBERNATION_PLATFORM:
+ if (!hibernation_ops)
+ break;
+ hibernation_ops->enter();
+ /* We should never get here */
+ while (1);
+ break;
+ case HIBERNATION_SHUTDOWN:
+ machine_power_off();
+ break;
+ }
+ machine_halt();
+ /*
+ * Valid image is on the disk, if we continue we risk serious data
+ * corruption after resume.
+ */
+ printk(KERN_CRIT "Please power me down manually\n");
+ while (1);
+}
+
+int kexec_hibernate(struct kimage *image)
+{
+ int error;
+ int platform_mode = (hibernation_mode == HIBERNATION_PLATFORM);
+ unsigned long cmd_ret;
+
+ mutex_lock(&pm_mutex);
+
+ pm_prepare_console();
+ suspend_console();
+
+ error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+ if (error)
+ goto Resume_console;
+
+ error = platform_start(platform_mode);
+ if (error)
+ goto Resume_console;
+
+ error = device_suspend(PMSG_FREEZE);
+ if (error)
+ goto Resume_console;
+
+ error = platform_pre_snapshot(platform_mode);
+ if (error)
+ goto Resume_devices;
+
+ error = disable_nonboot_cpus();
+ if (error)
+ goto Resume_devices;
+ local_irq_disable();
+ /* At this point, device_suspend() has been called, but *not*
+ * device_power_down(). We *must* device_power_down() now.
+ * Otherwise, drivers for some devices (e.g. interrupt
+ * controllers) become desynchronized with the actual state of
+ * the hardware at resume time, and evil weirdness ensues.
+ */
+ error = device_power_down(PMSG_FREEZE);
+ if (error)
+ goto Enable_irqs;
+
+ save_processor_state();
+ error = machine_kexec_jump(image, &cmd_ret,
+ KJUMP_CMD_HIBERNATE_WRITE_IMAGE);
+ restore_processor_state();
+
+ if (cmd_ret == KJUMP_CMD_HIBERNATE_POWER_DOWN)
+ kexec_hibernate_power_down();
+
+ platform_leave(platform_mode);
+
+ /* NOTE: device_power_up() is just a resume() for devices
+ * that suspended with irqs off ... no overall powerup.
+ */
+ device_power_up();
+ Enable_irqs:
+ local_irq_enable();
+ enable_nonboot_cpus();
+ Resume_devices:
+ platform_finish(platform_mode);
+ device_resume();
+ Resume_console:
+ pm_notifier_call_chain(PM_POST_HIBERNATION);
+ resume_console();
+ pm_restore_console();
+ mutex_unlock(&pm_mutex);
+ return error;
+}
+
+int kexec_resume(struct kimage *image)
+{
+ int error;
+ int platform_mode = (hibernation_mode == HIBERNATION_PLATFORM);
+
+ mutex_lock(&pm_mutex);
+
+ pm_prepare_console();
+ suspend_console();
+
+ error = device_suspend(PMSG_PRETHAW);
+ if (error)
+ goto Resume_console;
+
+ error = platform_pre_restore(platform_mode);
+ if (error)
+ goto Resume_devices;
+
+ error = disable_nonboot_cpus();
+ if (error)
+ goto Resume_devices;
+ local_irq_disable();
+ /* At this point, device_suspend() has been called, but *not*
+ * device_power_down(). We *must* device_power_down() now.
+ * Otherwise, drivers for some devices (e.g. interrupt controllers)
+ * become desynchronized with the actual state of the hardware
+ * at resume time, and evil weirdness ensues.
+ */
+ error = device_power_down(PMSG_PRETHAW);
+ if (error)
+ goto Enable_irqs;
+
+ save_processor_state();
+ error = machine_kexec_jump(image, NULL, KJUMP_CMD_HIBERNATE_RESUME);
+ restore_processor_state();
+
+ platform_leave(platform_mode);
+
+ /* NOTE: device_power_up() is just a resume() for devices
+ * that suspended with irqs off ... no overall powerup.
+ */
+ device_power_up();
+ Enable_irqs:
+ local_irq_enable();
+ enable_nonboot_cpus();
+ Resume_devices:
+ platform_restore_cleanup(platform_mode);
+ device_resume();
+ Resume_console:
+ resume_console();
+ pm_restore_console();
+ mutex_unlock(&pm_mutex);
+ return error;
+}
+#endif

/**
* software_resume - Resume from a saved image.
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -207,6 +207,15 @@ static inline void hibernation_set_ops(s
static inline int hibernate(void) { return -ENOSYS; }
#endif /* CONFIG_HIBERNATION */

+struct kimage;
+#if defined(CONFIG_HIBERNATION) && defined(CONFIG_KEXEC)
+extern int kexec_hibernate(struct kimage *image);
+extern int kexec_resume(struct kimage *image);
+#else
+static inline int kexec_hibernate(struct kimage *image) { return -ENOSYS; }
+static inline int kexec_resume(struct kimage *image) { return -ENOSYS; }
+#endif
+
#ifdef CONFIG_PM_SLEEP
void save_processor_state(void);
void restore_processor_state(void);
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -321,6 +321,34 @@ static int kernel_kexec(unsigned long cm
return ret;
}

+static int kernel_kexec_hibernate(void)
+{
+ int ret = -ENOSYS;
+#if defined(CONFIG_KEXEC) && defined(CONFIG_HIBERNATION)
+ struct kimage *image;
+ image = xchg(&kexec_image, NULL);
+ if (!image)
+ return -EINVAL;
+ ret = kexec_hibernate(image);
+ kimage_free(image);
+#endif
+ return ret;
+}
+
+static int kernel_kexec_resume(void)
+{
+ int ret = -ENOSYS;
+#if defined(CONFIG_KEXEC) && defined(CONFIG_HIBERNATION)
+ struct kimage *image;
+ image = xchg(&kexec_image, NULL);
+ if (!image)
+ return -EINVAL;
+ ret = kexec_resume(image);
+ kimage_free(image);
+#endif
+ return ret;
+}
+
static void kernel_shutdown_prepare(enum system_states state)
{
blocking_notifier_call_chain(&reboot_notifier_list,
@@ -442,6 +470,20 @@ asmlinkage long sys_reboot(int magic1, i
}
#endif

+ case LINUX_REBOOT_CMD_KEXEC_HIBERNATE:
+ {
+ int ret = kernel_kexec_hibernate();
+ unlock_kernel();
+ return ret;
+ }
+
+ case LINUX_REBOOT_CMD_KEXEC_RESUME:
+ {
+ int ret = kernel_kexec_resume();
+ unlock_kernel();
+ return ret;
+ }
+
default:
unlock_kernel();
return -EINVAL;
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -33,6 +33,8 @@
#define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
#define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2
#define LINUX_REBOOT_CMD_KEXEC 0x45584543
+#define LINUX_REBOOT_CMD_KEXEC_HIBERNATE 0xE4390DF3
+#define LINUX_REBOOT_CMD_KEXEC_RESUME 0xF5A41C2E


#ifdef __KERNEL__
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -129,7 +129,10 @@ extern int machine_kexec_jump(struct kim
unsigned long cmd);
extern int kexec_jump(struct kimage *image, unsigned long *cmd_ret,
unsigned long cmd);
-#define KJUMP_CMD_NONE 0
+#define KJUMP_CMD_NONE 0x0
+#define KJUMP_CMD_HIBERNATE_WRITE_IMAGE 0x6b630001
+#define KJUMP_CMD_HIBERNATE_POWER_DOWN 0x6b630002
+#define KJUMP_CMD_HIBERNATE_RESUME 0x6b630003
#ifdef CONFIG_COMPAT
extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
unsigned long nr_segments,


2007-11-19 18:05:38

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 3/3 -mm] kexec based hibernation -v6: kexec hibernate/resume

On Monday, 19 of November 2007, Huang, Ying wrote:
> This patch implements kexec based hibernate/resume. This is based on
> the facility provided by kexec_jump. The ACPI methods are called at
> specified environment to conform the ACPI specification. Two new
> reboot commands are added to trigger hibernate/resume.
>
> Signed-off-by: Huang Ying <[email protected]>
>
> ---
> include/linux/kexec.h | 5 +
> include/linux/reboot.h | 2
> include/linux/suspend.h | 9 ++
> kernel/power/disk.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++
> kernel/sys.c | 42 +++++++++++++
> 5 files changed, 212 insertions(+), 1 deletion(-)
>
> --- a/kernel/power/disk.c
> +++ b/kernel/power/disk.c
> @@ -21,6 +21,7 @@
> #include <linux/console.h>
> #include <linux/cpu.h>
> #include <linux/freezer.h>
> +#include <linux/kexec.h>
>
> #include "power.h"
>
> @@ -438,6 +439,160 @@ int hibernate(void)
> return error;
> }
>
> +#ifdef CONFIG_KEXEC
> +static void kexec_hibernate_power_down(void)
> +{
> + switch (hibernation_mode) {
> + case HIBERNATION_TEST:
> + case HIBERNATION_TESTPROC:
> + break;
> + case HIBERNATION_REBOOT:
> + machine_restart(NULL);
> + break;
> + case HIBERNATION_PLATFORM:
> + if (!hibernation_ops)
> + break;
> + hibernation_ops->enter();

hibernation_platform_enter() should be used here (as of the current mainline).

> + /* We should never get here */
> + while (1);
> + break;
> + case HIBERNATION_SHUTDOWN:
> + machine_power_off();
> + break;
> + }
> + machine_halt();
> + /*
> + * Valid image is on the disk, if we continue we risk serious data
> + * corruption after resume.
> + */
> + printk(KERN_CRIT "Please power me down manually\n");
> + while (1);
> +}

Hm, what's the difference between the above function and power_down(),
actually?

> +
> +int kexec_hibernate(struct kimage *image)
> +{
> + int error;
> + int platform_mode = (hibernation_mode == HIBERNATION_PLATFORM);
> + unsigned long cmd_ret;
> +
> + mutex_lock(&pm_mutex);
> +
> + pm_prepare_console();
> + suspend_console();
> +
> + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
> + if (error)
> + goto Resume_console;
> +
> + error = platform_start(platform_mode);
> + if (error)
> + goto Resume_console;
> +
> + error = device_suspend(PMSG_FREEZE);
> + if (error)
> + goto Resume_console;
> +
> + error = platform_pre_snapshot(platform_mode);
> + if (error)
> + goto Resume_devices;
> +
> + error = disable_nonboot_cpus();
> + if (error)
> + goto Resume_devices;

I wonder if it's viable to merge the above with hibernate() and
hibernation_snapshot() somehow, to avoid code duplication?

> + local_irq_disable();
> + /* At this point, device_suspend() has been called, but *not*
> + * device_power_down(). We *must* device_power_down() now.
> + * Otherwise, drivers for some devices (e.g. interrupt
> + * controllers) become desynchronized with the actual state of
> + * the hardware at resume time, and evil weirdness ensues.
> + */
> + error = device_power_down(PMSG_FREEZE);
> + if (error)
> + goto Enable_irqs;
> +
> + save_processor_state();
> + error = machine_kexec_jump(image, &cmd_ret,
> + KJUMP_CMD_HIBERNATE_WRITE_IMAGE);

It looks like machine_kexec_jump() simply replaces the call to
swsusp_arch_suspend() in create_image(). Is it really necessary to duplicate
all that code?

> + restore_processor_state();
> +
> + if (cmd_ret == KJUMP_CMD_HIBERNATE_POWER_DOWN)
> + kexec_hibernate_power_down();
> +
> + platform_leave(platform_mode);
> +
> + /* NOTE: device_power_up() is just a resume() for devices
> + * that suspended with irqs off ... no overall powerup.
> + */
> + device_power_up();
> + Enable_irqs:
> + local_irq_enable();
> + enable_nonboot_cpus();
> + Resume_devices:
> + platform_finish(platform_mode);
> + device_resume();
> + Resume_console:
> + pm_notifier_call_chain(PM_POST_HIBERNATION);
> + resume_console();
> + pm_restore_console();
> + mutex_unlock(&pm_mutex);
> + return error;
> +}
> +
> +int kexec_resume(struct kimage *image)
> +{
> + int error;
> + int platform_mode = (hibernation_mode == HIBERNATION_PLATFORM);
> +
> + mutex_lock(&pm_mutex);
> +
> + pm_prepare_console();
> + suspend_console();
> +
> + error = device_suspend(PMSG_PRETHAW);
> + if (error)
> + goto Resume_console;
> +
> + error = platform_pre_restore(platform_mode);
> + if (error)
> + goto Resume_devices;
> +
> + error = disable_nonboot_cpus();
> + if (error)
> + goto Resume_devices;
> + local_irq_disable();
> + /* At this point, device_suspend() has been called, but *not*
> + * device_power_down(). We *must* device_power_down() now.
> + * Otherwise, drivers for some devices (e.g. interrupt controllers)
> + * become desynchronized with the actual state of the hardware
> + * at resume time, and evil weirdness ensues.
> + */
> + error = device_power_down(PMSG_PRETHAW);
> + if (error)
> + goto Enable_irqs;
> +
> + save_processor_state();
> + error = machine_kexec_jump(image, NULL, KJUMP_CMD_HIBERNATE_RESUME);

Same here. Much of the code in this function duplicates the existing code. Is
this really necessary?

> + restore_processor_state();
> +
> + platform_leave(platform_mode);
> +
> + /* NOTE: device_power_up() is just a resume() for devices
> + * that suspended with irqs off ... no overall powerup.
> + */
> + device_power_up();
> + Enable_irqs:
> + local_irq_enable();
> + enable_nonboot_cpus();
> + Resume_devices:
> + platform_restore_cleanup(platform_mode);
> + device_resume();
> + Resume_console:
> + resume_console();
> + pm_restore_console();
> + mutex_unlock(&pm_mutex);
> + return error;
> +}
> +#endif
>
> /**
> * software_resume - Resume from a saved image.

Apart from the above, there's some new debug code to be added to disk.c
in 2.6.25. It's in the ACPI test tree right now and you can get it as
individual patches from:
http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.24-rc2/patches/
(patches 10-12).

Please base your changes on top of that.

> --- a/include/linux/suspend.h
> +++ b/include/linux/suspend.h
> @@ -207,6 +207,15 @@ static inline void hibernation_set_ops(s
> static inline int hibernate(void) { return -ENOSYS; }
> #endif /* CONFIG_HIBERNATION */
>
> +struct kimage;
> +#if defined(CONFIG_HIBERNATION) && defined(CONFIG_KEXEC)
> +extern int kexec_hibernate(struct kimage *image);
> +extern int kexec_resume(struct kimage *image);
> +#else
> +static inline int kexec_hibernate(struct kimage *image) { return -ENOSYS; }
> +static inline int kexec_resume(struct kimage *image) { return -ENOSYS; }
> +#endif
> +
> #ifdef CONFIG_PM_SLEEP
> void save_processor_state(void);
> void restore_processor_state(void);
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -321,6 +321,34 @@ static int kernel_kexec(unsigned long cm
> return ret;
> }
>
> +static int kernel_kexec_hibernate(void)
> +{
> + int ret = -ENOSYS;
> +#if defined(CONFIG_KEXEC) && defined(CONFIG_HIBERNATION)
> + struct kimage *image;
> + image = xchg(&kexec_image, NULL);
> + if (!image)
> + return -EINVAL;
> + ret = kexec_hibernate(image);
> + kimage_free(image);
> +#endif
> + return ret;
> +}
> +
> +static int kernel_kexec_resume(void)
> +{
> + int ret = -ENOSYS;
> +#if defined(CONFIG_KEXEC) && defined(CONFIG_HIBERNATION)
> + struct kimage *image;
> + image = xchg(&kexec_image, NULL);
> + if (!image)
> + return -EINVAL;
> + ret = kexec_resume(image);
> + kimage_free(image);
> +#endif
> + return ret;
> +}
> +
> static void kernel_shutdown_prepare(enum system_states state)
> {
> blocking_notifier_call_chain(&reboot_notifier_list,
> @@ -442,6 +470,20 @@ asmlinkage long sys_reboot(int magic1, i
> }
> #endif
>
> + case LINUX_REBOOT_CMD_KEXEC_HIBERNATE:
> + {
> + int ret = kernel_kexec_hibernate();
> + unlock_kernel();
> + return ret;
> + }
> +
> + case LINUX_REBOOT_CMD_KEXEC_RESUME:
> + {
> + int ret = kernel_kexec_resume();
> + unlock_kernel();
> + return ret;
> + }
> +
> default:
> unlock_kernel();
> return -EINVAL;
> --- a/include/linux/reboot.h
> +++ b/include/linux/reboot.h
> @@ -33,6 +33,8 @@
> #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
> #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2
> #define LINUX_REBOOT_CMD_KEXEC 0x45584543
> +#define LINUX_REBOOT_CMD_KEXEC_HIBERNATE 0xE4390DF3
> +#define LINUX_REBOOT_CMD_KEXEC_RESUME 0xF5A41C2E
>
>
> #ifdef __KERNEL__
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -129,7 +129,10 @@ extern int machine_kexec_jump(struct kim
> unsigned long cmd);
> extern int kexec_jump(struct kimage *image, unsigned long *cmd_ret,
> unsigned long cmd);
> -#define KJUMP_CMD_NONE 0
> +#define KJUMP_CMD_NONE 0x0
> +#define KJUMP_CMD_HIBERNATE_WRITE_IMAGE 0x6b630001
> +#define KJUMP_CMD_HIBERNATE_POWER_DOWN 0x6b630002
> +#define KJUMP_CMD_HIBERNATE_RESUME 0x6b630003
> #ifdef CONFIG_COMPAT
> extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
> unsigned long nr_segments,
>

2007-11-20 02:06:10

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 3/3 -mm] kexec based hibernation -v6: kexec hibernate/resume

On Mon, 2007-11-19 at 19:22 +0100, Rafael J. Wysocki wrote:
> > +#ifdef CONFIG_KEXEC
> > +static void kexec_hibernate_power_down(void)
> > +{
> > + switch (hibernation_mode) {
> > + case HIBERNATION_TEST:
> > + case HIBERNATION_TESTPROC:
> > + break;
> > + case HIBERNATION_REBOOT:
> > + machine_restart(NULL);
> > + break;
> > + case HIBERNATION_PLATFORM:
> > + if (!hibernation_ops)
> > + break;
> > + hibernation_ops->enter();
>
> hibernation_platform_enter() should be used here (as of the current mainline).

The power_down will be called with interrupt disabled, device suspended,
non-boot CPU disabled. But the latest hibernate_platform_enter calls the
device_suspend, disable_nonboot_cpus etc function. So, I use
hibernation_ops->enter() directly instead of
hibernation_platform_enter().

> > + /* We should never get here */
> > + while (1);
> > + break;
> > + case HIBERNATION_SHUTDOWN:
> > + machine_power_off();
> > + break;
> > + }
> > + machine_halt();
> > + /*
> > + * Valid image is on the disk, if we continue we risk serious data
> > + * corruption after resume.
> > + */
> > + printk(KERN_CRIT "Please power me down manually\n");
> > + while (1);
> > +}
>
> Hm, what's the difference between the above function and power_down(),
> actually?

Same as above.

> > +
> > +int kexec_hibernate(struct kimage *image)
> > +{
> > + int error;
> > + int platform_mode = (hibernation_mode == HIBERNATION_PLATFORM);
> > + unsigned long cmd_ret;
> > +
> > + mutex_lock(&pm_mutex);
> > +
> > + pm_prepare_console();
> > + suspend_console();
> > +
> > + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
> > + if (error)
> > + goto Resume_console;
> > +
> > + error = platform_start(platform_mode);
> > + if (error)
> > + goto Resume_console;
> > +
> > + error = device_suspend(PMSG_FREEZE);
> > + if (error)
> > + goto Resume_console;
> > +
> > + error = platform_pre_snapshot(platform_mode);
> > + if (error)
> > + goto Resume_devices;
> > +
> > + error = disable_nonboot_cpus();
> > + if (error)
> > + goto Resume_devices;
>
> I wonder if it's viable to merge the above with hibernate() and
> hibernation_snapshot() somehow, to avoid code duplication?

Yes. Most code are duplicated. But there is one advantage not to merge
them: power_down can called with IRQ disabled to make it possible to
eliminate the freezer.

I think it is possible to merge the two implementation. I will try to do
it.

> Apart from the above, there's some new debug code to be added to disk.c
> in 2.6.25. It's in the ACPI test tree right now and you can get it as
> individual patches from:
> http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.24-rc2/patches/
> (patches 10-12).
>
> Please base your changes on top of that.

OK, I will do it.

Best Regards,
Huang Ying

2007-11-20 02:16:03

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 3/3 -mm] kexec based hibernation -v6: kexec hibernate/resume

On Tuesday, 20 of November 2007, Huang, Ying wrote:
> On Mon, 2007-11-19 at 19:22 +0100, Rafael J. Wysocki wrote:
> > > +#ifdef CONFIG_KEXEC
> > > +static void kexec_hibernate_power_down(void)
> > > +{
> > > + switch (hibernation_mode) {
> > > + case HIBERNATION_TEST:
> > > + case HIBERNATION_TESTPROC:
> > > + break;
> > > + case HIBERNATION_REBOOT:
> > > + machine_restart(NULL);
> > > + break;
> > > + case HIBERNATION_PLATFORM:
> > > + if (!hibernation_ops)
> > > + break;
> > > + hibernation_ops->enter();
> >
> > hibernation_platform_enter() should be used here (as of the current mainline).
>
> The power_down will be called with interrupt disabled, device suspended,
> non-boot CPU disabled. But the latest hibernate_platform_enter calls the
> device_suspend, disable_nonboot_cpus etc function. So, I use
> hibernation_ops->enter() directly instead of
> hibernation_platform_enter().

Hm, you need to call device_power_down(PMSG_SUSPEND) before
hibernation_ops->enter().

Also, all of the ACPI global calls need to be carried out before that and the
devices should be suspended rather than shut down in that case.

That's why hibernation_platform_enter() has been introduced, BTW.

> > > + /* We should never get here */
> > > + while (1);
> > > + break;
> > > + case HIBERNATION_SHUTDOWN:
> > > + machine_power_off();
> > > + break;
> > > + }
> > > + machine_halt();
> > > + /*
> > > + * Valid image is on the disk, if we continue we risk serious data
> > > + * corruption after resume.
> > > + */
> > > + printk(KERN_CRIT "Please power me down manually\n");
> > > + while (1);
> > > +}
> >
> > Hm, what's the difference between the above function and power_down(),
> > actually?
>
> Same as above.

I see, but I'm not sure it's correct.

> > > +
> > > +int kexec_hibernate(struct kimage *image)
> > > +{
> > > + int error;
> > > + int platform_mode = (hibernation_mode == HIBERNATION_PLATFORM);
> > > + unsigned long cmd_ret;
> > > +
> > > + mutex_lock(&pm_mutex);
> > > +
> > > + pm_prepare_console();
> > > + suspend_console();
> > > +
> > > + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
> > > + if (error)
> > > + goto Resume_console;
> > > +
> > > + error = platform_start(platform_mode);
> > > + if (error)
> > > + goto Resume_console;
> > > +
> > > + error = device_suspend(PMSG_FREEZE);
> > > + if (error)
> > > + goto Resume_console;
> > > +
> > > + error = platform_pre_snapshot(platform_mode);
> > > + if (error)
> > > + goto Resume_devices;
> > > +
> > > + error = disable_nonboot_cpus();
> > > + if (error)
> > > + goto Resume_devices;
> >
> > I wonder if it's viable to merge the above with hibernate() and
> > hibernation_snapshot() somehow, to avoid code duplication?
>
> Yes. Most code are duplicated. But there is one advantage not to merge
> them: power_down can called with IRQ disabled to make it possible to
> eliminate the freezer.

Please leave the freezer alone for now. We'll clean up all that when we decide
to remove it.

> I think it is possible to merge the two implementation. I will try to do
> it.

Yes, please use as much of the existing code as reasonably possible, so that we
don't need to duplicate changes in the future.

> > Apart from the above, there's some new debug code to be added to disk.c
> > in 2.6.25. It's in the ACPI test tree right now and you can get it as
> > individual patches from:
> > http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.24-rc2/patches/
> > (patches 10-12).
> >
> > Please base your changes on top of that.
>
> OK, I will do it.

Thanks.

There will be some more cosmetic changes in this area, probably before 2.6.25,
so please stay tuned.

Greetings,
Rafael

2007-11-20 02:49:19

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 3/3 -mm] kexec based hibernation -v6: kexec hibernate/resume

On Tue, 2007-11-20 at 03:24 +0100, Rafael J. Wysocki wrote:
> On Tuesday, 20 of November 2007, Huang, Ying wrote:
> > On Mon, 2007-11-19 at 19:22 +0100, Rafael J. Wysocki wrote:
> > > > +#ifdef CONFIG_KEXEC
> > > > +static void kexec_hibernate_power_down(void)
> > > > +{
> > > > + switch (hibernation_mode) {
> > > > + case HIBERNATION_TEST:
> > > > + case HIBERNATION_TESTPROC:
> > > > + break;
> > > > + case HIBERNATION_REBOOT:
> > > > + machine_restart(NULL);
> > > > + break;
> > > > + case HIBERNATION_PLATFORM:
> > > > + if (!hibernation_ops)
> > > > + break;
> > > > + hibernation_ops->enter();
> > >
> > > hibernation_platform_enter() should be used here (as of the current mainline).
> >
> > The power_down will be called with interrupt disabled, device suspended,
> > non-boot CPU disabled. But the latest hibernate_platform_enter calls the
> > device_suspend, disable_nonboot_cpus etc function. So, I use
> > hibernation_ops->enter() directly instead of
> > hibernation_platform_enter().
>
> Hm, you need to call device_power_down(PMSG_SUSPEND) before
> hibernation_ops->enter().
>
> Also, all of the ACPI global calls need to be carried out before that and the
> devices should be suspended rather than shut down in that case.
>
> That's why hibernation_platform_enter() has been introduced, BTW.

Situation is a little different between u/swsusp and khiberantion.

u/swsusp:

platform_start();
suspend console();
device_suspend(PMSG_FREEZE);
platform_pre_snapshot();
disable_nonboot_cpus();
local_irq_disable();
device_power_down(PMSG_FREEZE);
/* create snapshot */
device_power_up();
local_irq_enable();
enable_nonboot_cpus();
platform_finish();
device_resume();
resume_console();
/* write the image out */
hibernation_ops->start();
suspend_console();
device_suspend(PMSG_SUSPEND);
hibernation_ops->prepare();
disable_nonboot_cpus();
local_irq_disable();
device_power_down(PMSG_SUSPEND);
hibernation_ops->enter();

khibernation:

suspend_console();
platform_start();
device_suspend(PMSG_FREEZE);
platform_pre_snapshot();
disable_nonboot_cpus();
local_irq_disable();
device_power_down(PMSG_FREEZE);
/* jump to kexeced (hibernating) kernel */
/* in kexeced kernel */
device_power_up();
local_irq_eanble();
enable_nonboot_cpus();
device_resume();
resume_console();
/* write the image */
suspend_console();
device_suspend(PMSG_FREEZE);
disable_nonboot_cpus();
local_irq_disable();
device_power_down(PMSG_FREEZE);
/* jump to original (hibernated) kernel */
/* in original kernel */
hibernation_ops->enter();

The difference is:

- In u/swsusp, ACPI methods are executed twice, before writing out the
image and after writing out the image.
- After writing out the image, the PMSG_SUSPEND is used instead of
PMSG_FREEZE.

Some questions:

- What is the difference between PMSG_SUSPEND and PMSG_FREEZE?
- The ACPI methods should be executed once or twice? According to ACPI
specification?

Best Regards,
Huang Ying

2007-11-20 14:58:22

by Alan Stern

[permalink] [raw]
Subject: Re: [linux-pm] Re: [PATCH 3/3 -mm] kexec based hibernation -v6: kexec hibernate/resume

On Tue, 20 Nov 2007, Huang, Ying wrote:

> - What is the difference between PMSG_SUSPEND and PMSG_FREEZE?

SUSPEND means that the system is about to go into a low-power state, so
the driver should take the appropriate action to reduce the device's
power consumption. It should also stop all I/O and DMA to the device.
Interrupts can remain enabled if the device is supposed to be a wakeup
source; otherwise they should be disabled.

FREEZE means that the system is going to hibernate, and devices need to
be quiescent (no I/O, no DMA, and no interrupts) so that an atomic
memory snapshot can be captured. The driver should take the
appropriate action to quiesce the device but the power level doesn't
need to change.

PRETHAW means that the system is going to resume from hibernation by
loading a previously-saved memory snapshot. The driver should take the
appropriate action to quiesce the device (no I/O, no DMA, and no
interrupts) so that the snapshot can be safely restored, but the power
level doesn't need to change. The driver may also want to put the
device into a special state so that the saved kernel's resume method
will recognize the device has undergone a hibernation cycle and needs
to be reinitialized.

Alan Stern

2007-11-20 23:42:51

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 3/3 -mm] kexec based hibernation -v6: kexec hibernate/resume

On Tuesday, 20 of November 2007, Huang, Ying wrote:
> On Tue, 2007-11-20 at 03:24 +0100, Rafael J. Wysocki wrote:
> > On Tuesday, 20 of November 2007, Huang, Ying wrote:
> > > On Mon, 2007-11-19 at 19:22 +0100, Rafael J. Wysocki wrote:
> > > > > +#ifdef CONFIG_KEXEC
> > > > > +static void kexec_hibernate_power_down(void)
> > > > > +{
> > > > > + switch (hibernation_mode) {
> > > > > + case HIBERNATION_TEST:
> > > > > + case HIBERNATION_TESTPROC:
> > > > > + break;
> > > > > + case HIBERNATION_REBOOT:
> > > > > + machine_restart(NULL);
> > > > > + break;
> > > > > + case HIBERNATION_PLATFORM:
> > > > > + if (!hibernation_ops)
> > > > > + break;
> > > > > + hibernation_ops->enter();
> > > >
> > > > hibernation_platform_enter() should be used here (as of the current mainline).
> > >
> > > The power_down will be called with interrupt disabled, device suspended,
> > > non-boot CPU disabled. But the latest hibernate_platform_enter calls the
> > > device_suspend, disable_nonboot_cpus etc function. So, I use
> > > hibernation_ops->enter() directly instead of
> > > hibernation_platform_enter().
> >
> > Hm, you need to call device_power_down(PMSG_SUSPEND) before
> > hibernation_ops->enter().
> >
> > Also, all of the ACPI global calls need to be carried out before that and the
> > devices should be suspended rather than shut down in that case.
> >
> > That's why hibernation_platform_enter() has been introduced, BTW.
>
> Situation is a little different between u/swsusp and khiberantion.
>
> u/swsusp:
>
> platform_start();
> suspend console();
> device_suspend(PMSG_FREEZE);
> platform_pre_snapshot();
> disable_nonboot_cpus();
> local_irq_disable();
> device_power_down(PMSG_FREEZE);
> /* create snapshot */
> device_power_up();
> local_irq_enable();
> enable_nonboot_cpus();
> platform_finish();
> device_resume();
> resume_console();
> /* write the image out */
> hibernation_ops->start();
> suspend_console();
> device_suspend(PMSG_SUSPEND);
> hibernation_ops->prepare();
> disable_nonboot_cpus();
> local_irq_disable();
> device_power_down(PMSG_SUSPEND);
> hibernation_ops->enter();
>
> khibernation:
>
> suspend_console();
> platform_start();
> device_suspend(PMSG_FREEZE);
> platform_pre_snapshot();
> disable_nonboot_cpus();
> local_irq_disable();
> device_power_down(PMSG_FREEZE);
> /* jump to kexeced (hibernating) kernel */
> /* in kexeced kernel */
> device_power_up();
> local_irq_eanble();
> enable_nonboot_cpus();

You should call platform_finish() here, or device_resume() will not work
appropriately on some systems.

However, after platform_finish() has been executed, the ACPI firmware will
assume that the hibernation has been canceled, so you need to tell it that
you'd like to go into the low power state after all.

> device_resume();
> resume_console();
> /* write the image */

For this reason, you have to call hibernation_ops->start() once again
and the other functions like in the swsusp case, in that order.

> suspend_console();
> device_suspend(PMSG_FREEZE);
> disable_nonboot_cpus();
> local_irq_disable();
> device_power_down(PMSG_FREEZE);
> /* jump to original (hibernated) kernel */

This looks too fragile to my eyes.

Why don't you call hibernation_ops->enter() directly from the "kexeced" kernel?

Rafael

2007-11-20 23:43:17

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [linux-pm] Re: [PATCH 3/3 -mm] kexec based hibernation -v6: kexec hibernate/resume

On Tuesday, 20 of November 2007, Alan Stern wrote:
> On Tue, 20 Nov 2007, Huang, Ying wrote:
>
> > - What is the difference between PMSG_SUSPEND and PMSG_FREEZE?
>
> SUSPEND means that the system is about to go into a low-power state, so
> the driver should take the appropriate action to reduce the device's
> power consumption. It should also stop all I/O and DMA to the device.
> Interrupts can remain enabled if the device is supposed to be a wakeup
> source; otherwise they should be disabled.
>
> FREEZE means that the system is going to hibernate, and devices need to
> be quiescent (no I/O, no DMA, and no interrupts) so that an atomic
> memory snapshot can be captured. The driver should take the
> appropriate action to quiesce the device but the power level doesn't
> need to change.
>
> PRETHAW means that the system is going to resume from hibernation by
> loading a previously-saved memory snapshot. The driver should take the
> appropriate action to quiesce the device (no I/O, no DMA, and no
> interrupts) so that the snapshot can be safely restored, but the power
> level doesn't need to change. The driver may also want to put the
> device into a special state so that the saved kernel's resume method
> will recognize the device has undergone a hibernation cycle and needs
> to be reinitialized.

Exactly.

For this reason, PMSG_SUSPEND should be used right before calling
hibernation_ops->enter() .

Greetings,
Rafael

2007-11-21 08:02:05

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 3/3 -mm] kexec based hibernation -v6: kexec hibernate/resume

On Wed, 2007-11-21 at 01:00 +0100, Rafael J. Wysocki wrote:
> On Tuesday, 20 of November 2007, Huang, Ying wrote:
> > On Tue, 2007-11-20 at 03:24 +0100, Rafael J. Wysocki wrote:
> > > On Tuesday, 20 of November 2007, Huang, Ying wrote:
> > > > On Mon, 2007-11-19 at 19:22 +0100, Rafael J. Wysocki wrote:
> > > > > > +#ifdef CONFIG_KEXEC
> > > > > > +static void kexec_hibernate_power_down(void)
> > > > > > +{
> > > > > > + switch (hibernation_mode) {
> > > > > > + case HIBERNATION_TEST:
> > > > > > + case HIBERNATION_TESTPROC:
> > > > > > + break;
> > > > > > + case HIBERNATION_REBOOT:
> > > > > > + machine_restart(NULL);
> > > > > > + break;
> > > > > > + case HIBERNATION_PLATFORM:
> > > > > > + if (!hibernation_ops)
> > > > > > + break;
> > > > > > + hibernation_ops->enter();
> > > > >
> > > > > hibernation_platform_enter() should be used here (as of the current mainline).
> > > >
> > > > The power_down will be called with interrupt disabled, device suspended,
> > > > non-boot CPU disabled. But the latest hibernate_platform_enter calls the
> > > > device_suspend, disable_nonboot_cpus etc function. So, I use
> > > > hibernation_ops->enter() directly instead of
> > > > hibernation_platform_enter().
> > >
> > > Hm, you need to call device_power_down(PMSG_SUSPEND) before
> > > hibernation_ops->enter().
> > >
> > > Also, all of the ACPI global calls need to be carried out before that and the
> > > devices should be suspended rather than shut down in that case.
> > >
> > > That's why hibernation_platform_enter() has been introduced, BTW.
> >
> > Situation is a little different between u/swsusp and khiberantion.
> >
> > u/swsusp:
> >
> > platform_start();
> > suspend console();
> > device_suspend(PMSG_FREEZE);
> > platform_pre_snapshot();
> > disable_nonboot_cpus();
> > local_irq_disable();
> > device_power_down(PMSG_FREEZE);
> > /* create snapshot */
> > device_power_up();
> > local_irq_enable();
> > enable_nonboot_cpus();
> > platform_finish();
> > device_resume();
> > resume_console();
> > /* write the image out */
> > hibernation_ops->start();
> > suspend_console();
> > device_suspend(PMSG_SUSPEND);
> > hibernation_ops->prepare();
> > disable_nonboot_cpus();
> > local_irq_disable();
> > device_power_down(PMSG_SUSPEND);
> > hibernation_ops->enter();
> >
> > khibernation:
> >
> > suspend_console();
> > platform_start();
> > device_suspend(PMSG_FREEZE);
> > platform_pre_snapshot();
> > disable_nonboot_cpus();
> > local_irq_disable();
> > device_power_down(PMSG_FREEZE);
> > /* jump to kexeced (hibernating) kernel */
> > /* in kexeced kernel */
> > device_power_up();
> > local_irq_eanble();
> > enable_nonboot_cpus();
>
> You should call platform_finish() here, or device_resume() will not work
> appropriately on some systems.
>
> However, after platform_finish() has been executed, the ACPI firmware will
> assume that the hibernation has been canceled, so you need to tell it that
> you'd like to go into the low power state after all.
>
> > device_resume();
> > resume_console();
> > /* write the image */
>
> For this reason, you have to call hibernation_ops->start() once again
> and the other functions like in the swsusp case, in that order.
>
> > suspend_console();
> > device_suspend(PMSG_FREEZE);
> > disable_nonboot_cpus();
> > local_irq_disable();
> > device_power_down(PMSG_FREEZE);
> > /* jump to original (hibernated) kernel */
>
> This looks too fragile to my eyes.
>
> Why don't you call hibernation_ops->enter() directly from the "kexeced" kernel?

I don't know whether there are ACPI global state inside Linux kernel. So
I restrict all ACPI method calling in original kernel. If the ACPI
global state in Linux kernel is not a issue, I can call
hibernation_ops->enter() directly in the "kexeced" kernel.

Best Regards,
Huang Ying

2007-11-21 19:51:57

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 3/3 -mm] kexec based hibernation -v6: kexec hibernate/resume

On Wednesday, 21 of November 2007, Huang, Ying wrote:
> On Wed, 2007-11-21 at 01:00 +0100, Rafael J. Wysocki wrote:
> > On Tuesday, 20 of November 2007, Huang, Ying wrote:
> > > On Tue, 2007-11-20 at 03:24 +0100, Rafael J. Wysocki wrote:
> > > > On Tuesday, 20 of November 2007, Huang, Ying wrote:
> > > > > On Mon, 2007-11-19 at 19:22 +0100, Rafael J. Wysocki wrote:
> > > > > > > +#ifdef CONFIG_KEXEC
> > > > > > > +static void kexec_hibernate_power_down(void)
> > > > > > > +{
> > > > > > > + switch (hibernation_mode) {
> > > > > > > + case HIBERNATION_TEST:
> > > > > > > + case HIBERNATION_TESTPROC:
> > > > > > > + break;
> > > > > > > + case HIBERNATION_REBOOT:
> > > > > > > + machine_restart(NULL);
> > > > > > > + break;
> > > > > > > + case HIBERNATION_PLATFORM:
> > > > > > > + if (!hibernation_ops)
> > > > > > > + break;
> > > > > > > + hibernation_ops->enter();
> > > > > >
> > > > > > hibernation_platform_enter() should be used here (as of the current mainline).
> > > > >
> > > > > The power_down will be called with interrupt disabled, device suspended,
> > > > > non-boot CPU disabled. But the latest hibernate_platform_enter calls the
> > > > > device_suspend, disable_nonboot_cpus etc function. So, I use
> > > > > hibernation_ops->enter() directly instead of
> > > > > hibernation_platform_enter().
> > > >
> > > > Hm, you need to call device_power_down(PMSG_SUSPEND) before
> > > > hibernation_ops->enter().
> > > >
> > > > Also, all of the ACPI global calls need to be carried out before that and the
> > > > devices should be suspended rather than shut down in that case.
> > > >
> > > > That's why hibernation_platform_enter() has been introduced, BTW.
> > >
> > > Situation is a little different between u/swsusp and khiberantion.
> > >
> > > u/swsusp:
> > >
> > > platform_start();
> > > suspend console();
> > > device_suspend(PMSG_FREEZE);
> > > platform_pre_snapshot();
> > > disable_nonboot_cpus();
> > > local_irq_disable();
> > > device_power_down(PMSG_FREEZE);
> > > /* create snapshot */
> > > device_power_up();
> > > local_irq_enable();
> > > enable_nonboot_cpus();
> > > platform_finish();
> > > device_resume();
> > > resume_console();
> > > /* write the image out */
> > > hibernation_ops->start();
> > > suspend_console();
> > > device_suspend(PMSG_SUSPEND);
> > > hibernation_ops->prepare();
> > > disable_nonboot_cpus();
> > > local_irq_disable();
> > > device_power_down(PMSG_SUSPEND);
> > > hibernation_ops->enter();
> > >
> > > khibernation:
> > >
> > > suspend_console();
> > > platform_start();
> > > device_suspend(PMSG_FREEZE);
> > > platform_pre_snapshot();
> > > disable_nonboot_cpus();
> > > local_irq_disable();
> > > device_power_down(PMSG_FREEZE);
> > > /* jump to kexeced (hibernating) kernel */
> > > /* in kexeced kernel */
> > > device_power_up();
> > > local_irq_eanble();
> > > enable_nonboot_cpus();
> >
> > You should call platform_finish() here, or device_resume() will not work
> > appropriately on some systems.
> >
> > However, after platform_finish() has been executed, the ACPI firmware will
> > assume that the hibernation has been canceled, so you need to tell it that
> > you'd like to go into the low power state after all.
> >
> > > device_resume();
> > > resume_console();
> > > /* write the image */
> >
> > For this reason, you have to call hibernation_ops->start() once again
> > and the other functions like in the swsusp case, in that order.
> >
> > > suspend_console();
> > > device_suspend(PMSG_FREEZE);
> > > disable_nonboot_cpus();
> > > local_irq_disable();
> > > device_power_down(PMSG_FREEZE);
> > > /* jump to original (hibernated) kernel */
> >
> > This looks too fragile to my eyes.
> >
> > Why don't you call hibernation_ops->enter() directly from the "kexeced" kernel?
>
> I don't know whether there are ACPI global state inside Linux kernel. So
> I restrict all ACPI method calling in original kernel. If the ACPI
> global state in Linux kernel is not a issue, I can call
> hibernation_ops->enter() directly in the "kexeced" kernel.

Well, I can't say to what extent the Linux ACPI tracks the global state.
Still, even if it does, you don't need to jump back to the hibernated kernel
just to power off the system.

Actually, if you really want to include ACPI into the picture, which I'm not
sure is the best idea at this point, you should use all of the appropriate
ACPI hooks in the kexeced kernel and you shouldn't get back to the hibernated
kernel after you've saved the image.

Regards,
Rafael