Hi,
The first of the following three patches introduces notifier chaing that can be
used by subsystems to prefore some hibernation-related or suspend-related
operations independent of device drivers' .suspend() and .resume() callbacks.
The next two patches use this mechanism for disabling/enabling the usermode
helper and firmware requesting functionality before/after (respectively) the
hibernation or suspend.
Comments welcome.
Greetings,
Rafael
From: Rafael J. Wysocki <[email protected]>
Use a hibernation and suspend notifier to disable the user mode helper before
a hibernation/suspend and enable it after the operation.
Signed-off-by: Rafael J. Wysocki <[email protected]>
kernel/kmod.c | 31 +++++++++++++++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)
Index: linux-2.6.22-rc3/kernel/kmod.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/kmod.c 2007-05-27 18:56:44.000000000 +0200
+++ linux-2.6.22-rc3/kernel/kmod.c 2007-05-27 19:43:35.000000000 +0200
@@ -33,6 +33,8 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/resource.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
#include <asm/uaccess.h>
extern int max_threads;
@@ -46,6 +48,14 @@ static struct workqueue_struct *khelper_
*/
char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
+/*
+ * If set, both call_usermodehelper_keys() and call_usermodehelper_pipe() exit
+ * immediately returning -EBUSY. Used for preventing user land processes from
+ * being created after the user land has been frozen during a system-wide
+ * hibernation or suspend operation.
+ */
+static int usermodehelper_disabled;
+
/**
* request_module - try to load a kernel module
* @fmt: printf style format string for the name of the module
@@ -251,6 +261,22 @@ static void __call_usermodehelper(struct
complete(sub_info->complete);
}
+static int usermodehelper_pm_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *ignored)
+{
+ switch (action) {
+ case PM_PRE_FREEZE:
+ usermodehelper_disabled = 1;
+ return NOTIFY_OK;
+ case PM_POST_THAW:
+ usermodehelper_disabled = 0;
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
/**
* call_usermodehelper_keys - start a usermode application
* @path: pathname for the application
@@ -276,7 +302,7 @@ int call_usermodehelper_keys(char *path,
struct subprocess_info *sub_info;
int retval;
- if (!khelper_wq)
+ if (!khelper_wq || usermodehelper_disabled)
return -EBUSY;
if (path[0] == '\0')
@@ -319,7 +345,7 @@ int call_usermodehelper_pipe(char *path,
};
struct file *f;
- if (!khelper_wq)
+ if (!khelper_wq || usermodehelper_disabled)
return -EBUSY;
if (path[0] == '\0')
@@ -347,4 +373,5 @@ void __init usermodehelper_init(void)
{
khelper_wq = create_singlethread_workqueue("khelper");
BUG_ON(!khelper_wq);
+ pm_notifier(usermodehelper_pm_callback, 0);
}
From: Rafael J. Wysocki <[email protected]>
Make it possible to register hibernation and suspend notifiers, so that
subsystems can perform hibernation-related or suspend-related operations that
should not be carried out by device drivers' .suspend() and .resume() routines.
Signed-off-by: Rafael J. Wysocki <[email protected]>
Documentation/power/notifiers.txt | 76 ++++++++++++++++++++++++++++++++++++++
include/linux/notifier.h | 13 ++++++
include/linux/suspend.h | 25 +++++++++++-
kernel/power/Makefile | 2 -
kernel/power/disk.c | 31 +++++++++++++--
kernel/power/main.c | 12 ++++++
kernel/power/notifier.c | 51 +++++++++++++++++++++++++
kernel/power/power.h | 4 ++
kernel/power/user.c | 11 ++++-
9 files changed, 214 insertions(+), 11 deletions(-)
Index: linux-2.6.22-rc3/kernel/power/notifier.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22-rc3/kernel/power/notifier.c 2007-05-27 14:43:14.000000000 +0200
@@ -0,0 +1,51 @@
+/*
+ * linux/kernel/power/notifier.c
+ *
+ * This file contains functions used for registering and calling hibernation and
+ * suspend (PM) notifiers that can be used by subsystems for carrying out some
+ * special hibernation-related and/or suspend-related operations.
+ *
+ * Copyright (C) 2007 Rafael J. Wysocki <[email protected]>
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/smp.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/suspend.h>
+
+static DEFINE_MUTEX(pm_notifier_lock);
+
+static RAW_NOTIFIER_HEAD(pm_chain);
+
+int register_pm_notifier(struct notifier_block *nb)
+{
+ int ret;
+ mutex_lock(&pm_notifier_lock);
+ ret = raw_notifier_chain_register(&pm_chain, nb);
+ mutex_unlock(&pm_notifier_lock);
+ return ret;
+}
+EXPORT_SYMBOL(register_pm_notifier);
+
+void unregister_pm_notifier(struct notifier_block *nb)
+{
+ mutex_lock(&pm_notifier_lock);
+ raw_notifier_chain_unregister(&pm_chain, nb);
+ mutex_unlock(&pm_notifier_lock);
+}
+EXPORT_SYMBOL(unregister_pm_notifier);
+
+int pm_notifier_call_chain(unsigned long val)
+{
+ int error = 0;
+
+ mutex_lock(&pm_notifier_lock);
+ if (raw_notifier_call_chain(&pm_chain, val, NULL) == NOTIFY_BAD)
+ error = -EINVAL;
+
+ mutex_unlock(&pm_notifier_lock);
+ return error;
+}
Index: linux-2.6.22-rc3/kernel/power/Makefile
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/Makefile 2007-05-27 14:41:59.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/Makefile 2007-05-27 14:43:14.000000000 +0200
@@ -3,7 +3,7 @@ ifeq ($(CONFIG_PM_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
endif
-obj-y := main.o process.o console.o
+obj-y := main.o process.o console.o notifier.o
obj-$(CONFIG_PM_LEGACY) += pm.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o
Index: linux-2.6.22-rc3/include/linux/suspend.h
===================================================================
--- linux-2.6.22-rc3.orig/include/linux/suspend.h 2007-05-27 14:41:59.000000000 +0200
+++ linux-2.6.22-rc3/include/linux/suspend.h 2007-05-27 14:43:14.000000000 +0200
@@ -54,7 +54,8 @@ struct hibernation_ops {
void (*restore_cleanup)(void);
};
-#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
+#ifdef CONFIG_PM
+#ifdef CONFIG_SOFTWARE_SUSPEND
/* kernel/power/snapshot.c */
extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
static inline void register_nosave_region(unsigned long b, unsigned long e)
@@ -72,7 +73,7 @@ extern unsigned long get_safe_page(gfp_t
extern void hibernation_set_ops(struct hibernation_ops *ops);
extern int hibernate(void);
-#else
+#else /* CONFIG_SOFTWARE_SUSPEND */
static inline void register_nosave_region(unsigned long b, unsigned long e) {}
static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
@@ -81,7 +82,7 @@ static inline void swsusp_unset_page_fre
static inline void hibernation_set_ops(struct hibernation_ops *ops) {}
static inline int hibernate(void) { return -ENOSYS; }
-#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */
+#endif /* CONFIG_SOFTWARE_SUSPEND */
void save_processor_state(void);
void restore_processor_state(void);
@@ -89,4 +90,22 @@ struct saved_context;
void __save_processor_state(struct saved_context *ctxt);
void __restore_processor_state(struct saved_context *ctxt);
+int register_pm_notifier(struct notifier_block *nb);
+void unregister_pm_notifier(struct notifier_block *nb);
+
+#define pm_notifier(fn, pri) { \
+ static struct notifier_block fn##_nb = \
+ { .notifier_call = fn, .priority = pri }; \
+ register_pm_notifier(&fn##_nb); \
+}
+#else /* CONFIG_PM */
+static inline int register_pm_notifier(struct notifier_block *nb) {
+ return 0;
+}
+static inline void unregister_pm_notifier(struct notifier_block *nb) {
+}
+
+#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
+#endif /* CONFIG_PM */
+
#endif /* _LINUX_SWSUSP_H */
Index: linux-2.6.22-rc3/kernel/power/power.h
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/power.h 2007-05-27 14:41:59.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/power.h 2007-05-27 14:43:14.000000000 +0200
@@ -173,5 +173,9 @@ extern void swsusp_close(void);
extern int suspend_enter(suspend_state_t state);
struct timeval;
+/* kernel/power/swsusp.c */
extern void swsusp_show_speed(struct timeval *, struct timeval *,
unsigned int, char *);
+
+/* kernel/power/notifier.c */
+extern int pm_notifier_call_chain(unsigned long val);
Index: linux-2.6.22-rc3/include/linux/notifier.h
===================================================================
--- linux-2.6.22-rc3.orig/include/linux/notifier.h 2007-05-27 14:41:59.000000000 +0200
+++ linux-2.6.22-rc3/include/linux/notifier.h 2007-05-27 14:43:14.000000000 +0200
@@ -209,5 +209,18 @@ extern int __srcu_notifier_call_chain(st
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
+/* Hibernation and suspend events */
+#define PM_PRE_FREEZE 0x0001 /* Going to freeze tasks */
+#define PM_POST_THAW 0x0002 /* Tasks have just been thawed */
+#define PM_HIBERNATION_PREPARE 0x0003 /* Tasks frozen, going to hibernate */
+#define PM_SNAPSHOT_FAILED 0x0004 /* Couldn't create hibernation image */
+#define PM_POST_SNAPSHOT 0x0005 /* Image created, going to save it */
+#define PM_RESTORE_PREPARE 0x0006 /* Going to restore snapshotted system */
+#define PM_RESTORE_FAILED 0x0007 /* Couldn't restore snapshotted system */
+#define PM_POST_RESTORE 0x0008 /* Restore complete, thawing tasks */
+#define PM_SUSPEND_PREPARE 0x0009 /* Tasks frozen, going to suspend */
+#define PM_SUSPEND_FAILED 0x000A /* Couldn't suspend */
+#define PM_POST_RESUME 0x000B /* Resume successful, thawing tasks */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NOTIFIER_H */
Index: linux-2.6.22-rc3/kernel/power/disk.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/disk.c 2007-05-27 14:41:59.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/disk.c 2007-05-27 15:42:54.000000000 +0200
@@ -130,6 +130,10 @@ int hibernation_snapshot(int platform_mo
{
int error;
+ error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+ if (error)
+ return error;
+
/* Free memory before shutting down devices. */
error = swsusp_shrink_memory();
if (error)
@@ -161,6 +165,12 @@ int hibernation_snapshot(int platform_mo
device_resume();
Resume_console:
resume_console();
+ if (error)
+ pm_notifier_call_chain(PM_SNAPSHOT_FAILED);
+ else
+ pm_notifier_call_chain(in_suspend ?
+ PM_POST_SNAPSHOT : PM_POST_RESTORE);
+
return error;
}
@@ -177,6 +187,10 @@ int hibernation_restore(int platform_mod
{
int error;
+ error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+ if (error)
+ return error;
+
pm_prepare_console();
suspend_console();
error = device_suspend(PMSG_PRETHAW);
@@ -195,6 +209,7 @@ int hibernation_restore(int platform_mod
Finish:
resume_console();
pm_restore_console();
+ pm_notifier_call_chain(PM_RESTORE_FAILED);
return error;
}
@@ -259,12 +274,17 @@ static void unprepare_processes(void)
{
thaw_processes();
pm_restore_console();
+ pm_notifier_call_chain(PM_POST_THAW);
}
static int prepare_processes(void)
{
int error = 0;
+ error = pm_notifier_call_chain(PM_PRE_FREEZE);
+ if (error)
+ return error;
+
pm_prepare_console();
if (freeze_processes()) {
error = -EBUSY;
@@ -281,9 +301,12 @@ int hibernate(void)
{
int error;
+ mutex_lock(&pm_mutex);
/* The snapshot device should not be opened while we're running */
- if (!atomic_add_unless(&snapshot_device_available, -1, 0))
- return -EBUSY;
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ error = -EBUSY;
+ goto Unlock;
+ }
/* Allocate memory management structures */
error = create_basic_memory_bitmaps();
@@ -294,7 +317,6 @@ int hibernate(void)
if (error)
goto Finish;
- mutex_lock(&pm_mutex);
if (hibernation_mode == HIBERNATION_TESTPROC) {
printk("swsusp debug: Waiting for 5 seconds.\n");
mdelay(5000);
@@ -316,12 +338,13 @@ int hibernate(void)
swsusp_free();
}
Thaw:
- mutex_unlock(&pm_mutex);
unprepare_processes();
Finish:
free_basic_memory_bitmaps();
Exit:
atomic_inc(&snapshot_device_available);
+ Unlock:
+ mutex_unlock(&pm_mutex);
return error;
}
Index: linux-2.6.22-rc3/kernel/power/main.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/main.c 2007-05-27 14:41:59.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/main.c 2007-05-27 15:05:51.000000000 +0200
@@ -82,6 +82,10 @@ static int suspend_prepare(suspend_state
if (!pm_ops || !pm_ops->enter)
return -EPERM;
+ error = pm_notifier_call_chain(PM_PRE_FREEZE);
+ if (error)
+ return error;
+
pm_prepare_console();
if (freeze_processes()) {
@@ -89,6 +93,10 @@ static int suspend_prepare(suspend_state
goto Thaw;
}
+ error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+ if (error)
+ return error;
+
if ((free_pages = global_page_state(NR_FREE_PAGES))
< FREE_PAGE_NUMBER) {
pr_debug("PM: free some memory\n");
@@ -121,9 +129,11 @@ static int suspend_prepare(suspend_state
device_resume();
Resume_console:
resume_console();
+ pm_notifier_call_chain(PM_SUSPEND_FAILED);
Thaw:
thaw_processes();
pm_restore_console();
+ pm_notifier_call_chain(PM_POST_THAW);
return error;
}
@@ -173,8 +183,10 @@ static void suspend_finish(suspend_state
pm_finish(state);
device_resume();
resume_console();
+ pm_notifier_call_chain(PM_POST_RESUME);
thaw_processes();
pm_restore_console();
+ pm_notifier_call_chain(PM_POST_THAW);
}
Index: linux-2.6.22-rc3/kernel/power/user.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/user.c 2007-05-27 14:41:59.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/user.c 2007-05-27 15:53:03.000000000 +0200
@@ -149,9 +149,13 @@ static int snapshot_ioctl(struct inode *
if (data->frozen)
break;
mutex_lock(&pm_mutex);
- if (freeze_processes()) {
- thaw_processes();
- error = -EBUSY;
+ error = pm_notifier_call_chain(PM_PRE_FREEZE);
+ if (!error) {
+ error = freeze_processes();
+ if (error) {
+ thaw_processes();
+ pm_notifier_call_chain(PM_POST_THAW);
+ }
}
mutex_unlock(&pm_mutex);
if (!error)
@@ -163,6 +167,7 @@ static int snapshot_ioctl(struct inode *
break;
mutex_lock(&pm_mutex);
thaw_processes();
+ pm_notifier_call_chain(PM_POST_THAW);
mutex_unlock(&pm_mutex);
data->frozen = 0;
break;
Index: linux-2.6.22-rc3/Documentation/power/notifiers.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22-rc3/Documentation/power/notifiers.txt 2007-05-27 18:20:48.000000000 +0200
@@ -0,0 +1,76 @@
+Suspend notifiers
+ (C) 2007 Rafael J. Wysocki <[email protected]>, GPL
+
+There are some operations that device drivers may want to carry out in their
+.suspend() routines, but shouldn't, because they can cause the hibernation or
+suspend to fail. For example, a driver may want to allocate a substantial amount
+of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
+swsusp's memory shrinker has run.
+
+Also, there may be some operations, that subsystems want to carry out before a
+hibernation/suspend or after a restore/resume, requiring the system to be fully
+functional, so the drivers' .suspend() and .resume() routines are not suitable
+for this purpose. For example, device drivers may want to upload firmware to
+their devices after a restore from a hibernation image, but they cannot do it by
+calling request_firmware() from their .resume() routines (user land processes
+are frozen at this point). The solution may be to load the firmware into
+memory before processes are frozen and upload it from there in the .resume()
+routine. Of course, a hibernation notifier may be used for this purpose.
+
+The subsystems that have such needs can register suspend notifiers that will be
+called upon the following events by the suspend core:
+
+PM_PRE_FREEZE The system is going to hibernate or suspend, tasks will
+ be frozen immediately
+
+PM_POST_THAW Tasks have just been thawed after a resume or restore
+ from a hibernation image
+
+PM_HIBERNATION_PREPARE The system is preparing for hibernation. Tasks have
+ been frozen, memory is going to be freed and devices
+ are going to be suspended.
+
+PM_SNAPSHOT_FAILED The creation of hibernation image has failed. Tasks
+ will be thawed immediately.
+
+PM_POST_SNAPSHOT Hibernation image has been created and it is going to
+ be saved immediately. Device drivers' .resume()
+ callbacks have been executed.
+
+PM_RESTORE_PREPARE The system is preparing for the restoration from a
+ hibernation image. Tasks have been frozen, devices
+ are going to be prepared for the restore.
+
+PM_RESTORE_FAILED The restore operation has failed. Tasks will be thawed
+ immediately and the system boot will continue.
+
+PM_POST_RESTORE The system memory state has been restored from a
+ hibernation image. Device drivers' .resume()
+ callbacks have been executed, tasks will be thawed
+ immediately.
+
+PM_SUSPEND_PREPARE The system is preparing for a suspend. Tasks have been
+ frozen, devices are going to be suspended.
+
+PM_SUSPEND_FAILED The suspend transition failed. Tasks will be thawed
+ immediately.
+
+PM_POST_RESUME The system has resumed. Tasks will be thawed
+ immediately.
+
+It is generally assumed that whatever the notifiers do for PM_PRE_FREEZE,
+should be undone for PM_POST_THAW. Moreover, what is done for
+PM_HIBERNATION_PREPARE, should be undone for PM_SNAPSHOT_FAILED,
+PM_POST_SNAPSHOT and PM_POST_RESTORE (eg. if memory is allocated for
+PM_HIBERNATION_PREPARE, it should be freed for PM_SNAPSHOT_FAILED,
+PM_POST_SNAPSHOT and PM_POST_RESTORE). Analogously, operations performed for
+PM_RESTORE_PREPARE should be reversed for PM_RESTORE_FAILED and operations
+performed for PM_SUSPEND_PREPARE should be reversed for PM_SUSPEND_FAILED and
+PM_POST_RESUME.
+
+The hibernation and suspend notifiers are called with pm_mutex held. They are
+defined in the usual way, but their last argument is meaningless (it is always
+NULL). To register and/or unregister a suspend notifier use the functions
+register_pm_notifier() and unregister_pm_notifier(), respectively, defined in
+kernel/power/notifier.c . If you don't need to unregister the notifier, you can
+also use the pm_notifier() macro defined in include/linux/suspend.h .
From: Rafael J. Wysocki <[email protected]>
Use a hibernation and suspend notifier to disable the firmware requesting
mechanism before a hibernation/suspend and enable it after the operation.
Signed-off-by: Rafael J. Wysocki <[email protected]>
drivers/base/firmware_class.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
Index: linux-2.6.22-rc3/drivers/base/firmware_class.c
===================================================================
--- linux-2.6.22-rc3.orig/drivers/base/firmware_class.c 2007-05-27 19:43:03.000000000 +0200
+++ linux-2.6.22-rc3/drivers/base/firmware_class.c 2007-05-27 19:44:06.000000000 +0200
@@ -17,6 +17,8 @@
#include <linux/bitops.h>
#include <linux/mutex.h>
#include <linux/kthread.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
#include <linux/firmware.h>
#include "base.h"
@@ -35,6 +37,9 @@ enum {
static int loading_timeout = 60; /* In seconds */
+/* If set, _request_firmware() will exit immediately returning -EBUSY */
+static int requesting_firmware_disabled;
+
/* fw_lock could be moved to 'struct firmware_priv' but since it is just
* guarding for corner cases a global lock should be OK */
static DEFINE_MUTEX(fw_lock);
@@ -397,6 +402,9 @@ _request_firmware(const struct firmware
struct firmware *firmware;
int retval;
+ if (requesting_firmware_disabled)
+ return -EBUSY;
+
if (!firmware_p)
return -EINVAL;
@@ -568,6 +576,26 @@ request_firmware_nowait(
return 0;
}
+static int firmware_class_pm_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *ignored)
+{
+ switch (action) {
+ case PM_PRE_FREEZE:
+ requesting_firmware_disabled = 1;
+ return NOTIFY_OK;
+ case PM_POST_THAW:
+ requesting_firmware_disabled = 0;
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block firmware_class_pm_nb = {
+ .notifier_call = firmware_class_pm_callback,
+};
+
static int __init
firmware_class_init(void)
{
@@ -582,6 +610,13 @@ firmware_class_init(void)
printk(KERN_ERR "%s: class_create_file failed\n",
__FUNCTION__);
class_unregister(&firmware_class);
+ return error;
+ }
+ error = register_pm_notifier(&firmware_class_pm_nb);
+ if (error) {
+ printk(KERN_ERR "%s: register_pm_notifier failed\n",
+ __FUNCTION__);
+ class_unregister(&firmware_class);
}
return error;
@@ -589,6 +624,7 @@ firmware_class_init(void)
static void __exit
firmware_class_exit(void)
{
+ unregister_pm_notifier(&firmware_class_pm_nb);
class_unregister(&firmware_class);
}
On 27 May 2007, at 21:31, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <[email protected]>
>
> Use a hibernation and suspend notifier to disable the firmware
> requesting
> mechanism before a hibernation/suspend and enable it after the
> operation.
>
> Signed-off-by: Rafael J. Wysocki <[email protected]>
>
> drivers/base/firmware_class.c | 36 ++++++++++++++++++++++++++++++
> ++++++
> 1 file changed, 36 insertions(+)
I don't like this approach, as I feel that the firmware loading
interface should be able to detect if a firmware load request is not
being handled, due to absence of userspace / hotplug handler presence.
Other circumstances in which this can be a problem is during bootup
when request_firmware() calls can be made before userspace is up and
init has run (even in the presence of an initramfs).
(Slightly OT: A particularly nasty race is when an initramfs
userspace is present, but firmware loading cannot occur because init
has not run, so proc hasn't been mounted, so a hotplug event handler
cannot be registered, despite the fact that the firmware is sitting
on the ramdisk mounted correctly...)
In short, a more general solution would be preferred, and preferably
one which allows firmware loading to *actually* occur once userspace
has actually turned up and registered a handler :)
Michael-Luke Jones
On Sun, May 27, 2007 at 10:31:53PM +0200, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <[email protected]>
>
> Use a hibernation and suspend notifier to disable the firmware requesting
> mechanism before a hibernation/suspend and enable it after the operation.
This avoids the problem of .resume methods calling userspace while
userspace is frozen and a resulting hang, but does it actually result in
the drivers beginning to work again? If we remove process freezing in
STR, this should just work[1] without the need to complicate things. On
the other hand, if we don't want to support these functions in the
suspend and resume methods we could just audit the kernel and remove
them all.
--
Matthew Garrett | [email protected]
On Sunday, 27 May 2007 22:49, Matthew Garrett wrote:
> On Sun, May 27, 2007 at 10:31:53PM +0200, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <[email protected]>
> >
> > Use a hibernation and suspend notifier to disable the firmware requesting
> > mechanism before a hibernation/suspend and enable it after the operation.
>
> This avoids the problem of .resume methods calling userspace while
> userspace is frozen and a resulting hang, but does it actually result in
> the drivers beginning to work again?
Well, this was acutally invented before you've decided to remove the freezing
of tasks from the suspend code path (which I think is a mistake, but that's
only my personal opinion, so it doesn't matter very much ;-)) and I regard it
as a workaround.
> If we remove process freezing in STR, this should just work[1] without the
> need to complicate things.
Under the (optimistic, IMO) assumption that the relevant user space task won't
block on I/O with a suspended device involved or something like this.
BTW, I know of two subsystems that want their kernel threads to be frozen for
synchronization purposes. Please see these messages:
1) https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012592.html
(plus follow up)
2) http://marc.info/?l=linux-kernel&m=117919066830575&w=2
Your patch breaks them and I suspect there are more cases like these.
Besides, there's the hibernation that needs to freeze tasks for another reason,
so it needs some way to ensure that drivers won't request firmware while
the user land is frozen.
> On the other hand, if we don't want to support these functions in the
> suspend and resume methods we could just audit the kernel and remove
> them all.
Yes, I think we should do this.
Greetings,
Rafael
On 5/27/07, Matthew Garrett <[email protected]> wrote:
> On Sun, May 27, 2007 at 10:31:53PM +0200, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <[email protected]>
> >
> > Use a hibernation and suspend notifier to disable the firmware requesting
> > mechanism before a hibernation/suspend and enable it after the operation.
>
> This avoids the problem of .resume methods calling userspace while
> userspace is frozen and a resulting hang, but does it actually result in
> the drivers beginning to work again? If we remove process freezing in
> STR, this should just work[1] without the need to complicate things. On
> the other hand, if we don't want to support these functions in the
> suspend and resume methods we could just audit the kernel and remove
> them all.
What exactly is the problem we see here? The timeout of the firmware loader?
What goes wrong with frozen userspace, usually there is only a netlink
message sent from the kernel, which should be received and handled
just fine when userspace is running again.
Thanks,
Kay
On Sunday, 27 May 2007 22:45, Michael-Luke Jones wrote:
> On 27 May 2007, at 21:31, Rafael J. Wysocki wrote:
>
> > From: Rafael J. Wysocki <[email protected]>
> >
> > Use a hibernation and suspend notifier to disable the firmware
> > requesting
> > mechanism before a hibernation/suspend and enable it after the
> > operation.
> >
> > Signed-off-by: Rafael J. Wysocki <[email protected]>
> >
> > drivers/base/firmware_class.c | 36 ++++++++++++++++++++++++++++++
> > ++++++
> > 1 file changed, 36 insertions(+)
>
> I don't like this approach, as I feel that the firmware loading
> interface should be able to detect if a firmware load request is not
> being handled, due to absence of userspace / hotplug handler presence.
In principle, I agree. In practice, though, I don't know how to make this
happen.
> Other circumstances in which this can be a problem is during bootup
> when request_firmware() calls can be made before userspace is up and
> init has run (even in the presence of an initramfs).
>
> (Slightly OT: A particularly nasty race is when an initramfs
> userspace is present, but firmware loading cannot occur because init
> has not run, so proc hasn't been mounted, so a hotplug event handler
> cannot be registered, despite the fact that the firmware is sitting
> on the ramdisk mounted correctly...)
>
> In short, a more general solution would be preferred, and preferably
> one which allows firmware loading to *actually* occur once userspace
> has actually turned up and registered a handler :)
I agree, but well ... ;-)
Greetings,
Rafael
On Sunday, 27 May 2007 23:49, Kay Sievers wrote:
> On 5/27/07, Matthew Garrett <[email protected]> wrote:
> > On Sun, May 27, 2007 at 10:31:53PM +0200, Rafael J. Wysocki wrote:
> > > From: Rafael J. Wysocki <[email protected]>
> > >
> > > Use a hibernation and suspend notifier to disable the firmware requesting
> > > mechanism before a hibernation/suspend and enable it after the operation.
> >
> > This avoids the problem of .resume methods calling userspace while
> > userspace is frozen and a resulting hang, but does it actually result in
> > the drivers beginning to work again? If we remove process freezing in
> > STR, this should just work[1] without the need to complicate things. On
> > the other hand, if we don't want to support these functions in the
> > suspend and resume methods we could just audit the kernel and remove
> > them all.
>
> What exactly is the problem we see here? The timeout of the firmware loader?
> What goes wrong with frozen userspace, usually there is only a netlink
> message sent from the kernel, which should be received and handled
> just fine when userspace is running again.
Users report the timeout as a problem and it's not that straightforward to
figure out what happens.
Still, I agree it's much better if drivers don't use request_firmware() in
their .resume() routines at all, because they shouldn't rely upon user land at
this point.
Greetings,
Rafael
On Sun, May 27, 2007 at 11:45:03PM +0200, Rafael J. Wysocki wrote:
> On Sunday, 27 May 2007 22:49, Matthew Garrett wrote:
> > If we remove process freezing in STR, this should just work[1] without the
> > need to complicate things.
>
> Under the (optimistic, IMO) assumption that the relevant user space task won't
> block on I/O with a suspended device involved or something like this.
Yeah, I forgot the footnote that was going to mention that. Clearly
there are issues if this is on some block device that hasn't been
resumed yet.
> BTW, I know of two subsystems that want their kernel threads to be frozen for
> synchronization purposes. Please see these messages:
>
> 1) https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012592.html
> (plus follow up)
>
> 2) http://marc.info/?l=linux-kernel&m=117919066830575&w=2
I'm not entirely sold on this. The issue is that there's the possibility
of races during suspend/resume? It sounds like that should be
implemented in the driver, rather than depending on a side-effect of
process freezing. Otherwise there's no way of selectively suspending
that device.
> Besides, there's the hibernation that needs to freeze tasks for another reason,
> so it needs some way to ensure that drivers won't request firmware while
> the user land is frozen.
I agree that that's an issue right now, but I think we should see if
this is repairable without just breaking those drivers. One option would
be to defer resuming them until userspace is alive again - that would be
no worse than the suspend to RAM case without process freezing.
--
Matthew Garrett | [email protected]
On Sun, May 27, 2007 at 11:49:30PM +0200, Kay Sievers wrote:
> What exactly is the problem we see here? The timeout of the firmware loader?
> What goes wrong with frozen userspace, usually there is only a netlink
> message sent from the kernel, which should be received and handled
> just fine when userspace is running again.
Driver calls request_firmware in the resume method. The userspace helper
can't be run because it's been frozen, so the firmware never gets loaded
and the call times out. The driver then fails to resume. While all this
is happening, the rest of the kernel is blocking on that resume method.
The firmware can be loaded once userspace has been started again, but by
that time the driver has given up.
--
Matthew Garrett | [email protected]
On Sun, 2007-05-27 at 23:04 +0100, Matthew Garrett wrote:
> On Sun, May 27, 2007 at 11:49:30PM +0200, Kay Sievers wrote:
>
> > What exactly is the problem we see here? The timeout of the firmware loader?
> > What goes wrong with frozen userspace, usually there is only a netlink
> > message sent from the kernel, which should be received and handled
> > just fine when userspace is running again.
>
> Driver calls request_firmware in the resume method. The userspace helper
> can't be run because it's been frozen, so the firmware never gets loaded
> and the call times out. The driver then fails to resume. While all this
> is happening, the rest of the kernel is blocking on that resume method.
> The firmware can be loaded once userspace has been started again, but by
> that time the driver has given up.
Seems, that's just the broken synchronous firmware loading interface
with the useless timeout handling. The nowait version of the same loader
doesn't time out, and should not have that problem. The sync version
should be removed from the kernel, it just causes all sorts of problems
since it exists.
Userspace should handle the async request just fine when it comes back
running, regardless of the time it was submitted.
Kay
On Monday, 28 May 2007 00:01, Matthew Garrett wrote:
> On Sun, May 27, 2007 at 11:45:03PM +0200, Rafael J. Wysocki wrote:
> > On Sunday, 27 May 2007 22:49, Matthew Garrett wrote:
> > > If we remove process freezing in STR, this should just work[1] without the
> > > need to complicate things.
> >
> > Under the (optimistic, IMO) assumption that the relevant user space task won't
> > block on I/O with a suspended device involved or something like this.
>
> Yeah, I forgot the footnote that was going to mention that. Clearly
> there are issues if this is on some block device that hasn't been
> resumed yet.
Or worse. Suppose, for example, that the device which needs the firmware
uploaded is your network adapter and the firmware file is on NFS.
I don't think there's a solution to the "requesting firmware from .resume()"
problem other than copying the firmware into memory _before_ the suspend
and using this copy to upload the firmware in .resume().
> > BTW, I know of two subsystems that want their kernel threads to be frozen for
> > synchronization purposes. Please see these messages:
> >
> > 1) https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012592.html
> > (plus follow up)
> >
> > 2) http://marc.info/?l=linux-kernel&m=117919066830575&w=2
>
> I'm not entirely sold on this. The issue is that there's the possibility
> of races during suspend/resume?
Yes.
> It sounds like that should be implemented in the driver, rather than
> depending on a side-effect of process freezing. Otherwise there's no way
> of selectively suspending that device.
That's true, but we've been using the freezer for so long that at least some
drivers started to rely on it being used. That's the reason, IMO, why we can't
just drop the freezer right now.
It can be _replaced_ by some other synchronization mechanisms, but not just
dropped. Moreover, I think that to implement such mechanisms within device
drivers we should first stop using the same .suspend() and .resume() routines
for both hibernation and suspend.
This is the plan right now (ie. to stop using .suspend() and .resume() for
hibernation) and when we're done with that (yes, it'll take some time), then we
can think of dropping the freezer entirely from the suspend code path.
> > Besides, there's the hibernation that needs to freeze tasks for another reason,
> > so it needs some way to ensure that drivers won't request firmware while
> > the user land is frozen.
>
> I agree that that's an issue right now, but I think we should see if
> this is repairable without just breaking those drivers.
Well, I don't think we're breaking the drivers.
> One option would be to defer resuming them until userspace is alive again -
> that would be no worse than the suspend to RAM case without process freezing.
Please consider the example with a firmware on NFS. :-)
I think the _solution_ would be to fix the drivers. The other approaches are
just workarounds, including the $subject patch.
Greetings,
Rafael
On Monday, 28 May 2007 00:16, Kay Sievers wrote:
> On Sun, 2007-05-27 at 23:04 +0100, Matthew Garrett wrote:
> > On Sun, May 27, 2007 at 11:49:30PM +0200, Kay Sievers wrote:
> >
> > > What exactly is the problem we see here? The timeout of the firmware loader?
> > > What goes wrong with frozen userspace, usually there is only a netlink
> > > message sent from the kernel, which should be received and handled
> > > just fine when userspace is running again.
> >
> > Driver calls request_firmware in the resume method. The userspace helper
> > can't be run because it's been frozen, so the firmware never gets loaded
> > and the call times out. The driver then fails to resume. While all this
> > is happening, the rest of the kernel is blocking on that resume method.
> > The firmware can be loaded once userspace has been started again, but by
> > that time the driver has given up.
>
> Seems, that's just the broken synchronous firmware loading interface
> with the useless timeout handling. The nowait version of the same loader
> doesn't time out, and should not have that problem. The sync version
> should be removed from the kernel, it just causes all sorts of problems
> since it exists.
>
> Userspace should handle the async request just fine when it comes back
> running, regardless of the time it was submitted.
Okay, so the solution is to convert the drivers to use
request_firmware_nowait() instead of request_firmware() in their .resume()
routines.
Greetings,
Rafael
Hi.
On Sun, 2007-05-27 at 23:45 +0200, Rafael J. Wysocki wrote:
> On Sunday, 27 May 2007 22:49, Matthew Garrett wrote:
> > On Sun, May 27, 2007 at 10:31:53PM +0200, Rafael J. Wysocki wrote:
> > > From: Rafael J. Wysocki <[email protected]>
> > >
> > > Use a hibernation and suspend notifier to disable the firmware requesting
> > > mechanism before a hibernation/suspend and enable it after the operation.
> >
> > This avoids the problem of .resume methods calling userspace while
> > userspace is frozen and a resulting hang, but does it actually result in
> > the drivers beginning to work again?
>
> Well, this was acutally invented before you've decided to remove the freezing
> of tasks from the suspend code path (which I think is a mistake, but that's
> only my personal opinion, so it doesn't matter very much ;-)) and I regard it
> as a workaround.
Suspend-to-ram code paths shouldn't assume userspace is unfrozen anyway.
Doesn't [u]swsusp have a code path like Suspend2 where we can suspend to
ram after writing the hibernation image? In that case, it will still be
possible that we seek to enter and leave S3 with processes frozen.
Apologies if anyone has already mentioned this - I'm just starting to
play catchup.
Regards,
Nigel
On 28 May 2007, at 08:43, Rafael J. Wysocki wrote:
>> Seems, that's just the broken synchronous firmware loading interface
>> with the useless timeout handling. The nowait version of the same
>> loader
>> doesn't time out, and should not have that problem. The sync version
>> should be removed from the kernel, it just causes all sorts of
>> problems
>> since it exists.
>>
>> Userspace should handle the async request just fine when it comes
>> back
>> running, regardless of the time it was submitted.
>
> Okay, so the solution is to convert the drivers to use
> request_firmware_nowait() instead of request_firmware() in
> their .resume()
> routines.
[added Rob Landley to CC]
I think asynchronous loading should be made the default behaviour.
However, we need to think and document how to do firmware loading.
Firmware loading can be done at two different times:
Device Driver Load
First use of Device Driver
For a network device, this correlates to when the driver is first
loaded into memory or at 'ifconfig up' respectively.
At device driver load, firmware loading must be asynchronous. This is
because device driver init can occur before userspace runs and
registers a hotplug/uevent event handler. If device use is attempted
before firmware loads, a -ENOFIRMWARE call would be great so that
userspace and thus the user can be clearly informed what the problem is.
However, at 'first use' firmware loading, the synchronous interface
should remain. 'ifconfig up' either works or it doesn't, and as I see
it, has to just hang around until firmware turns up.
One more thing, it seems that the asynchronous firmware loading
thread just spawns a _request_firmware() call which then times out at
60 seconds. I think, if the first request fails it spawns another. 60
seconds is *far* too long for this type of thing, and this was set at
10 seconds before the last two kernel releases (which is even a bit
slow itself). Unfortunately, this appears to a case of quite senior
kernel developers pushing a bodge upstream rather than fixing the
underlying issue :( [1] [2]
Documentation for how hotplug/uevent handlers should cope with these
types of firmware loading is also *strongly* requested, in order for
lightweight but fully functional implementations to be made.
Documentation > Reference Implementation :)
Michael-Luke
[1] http://tinyurl.com/2colng (git.kernel.org)
[2] http://tinyurl.com/224h54 (redhat bugzilla)
On Mon, 2007-05-28 at 09:48 +0100, Michael-Luke Jones wrote:
> On 28 May 2007, at 08:43, Rafael J. Wysocki wrote:
>
> >> Seems, that's just the broken synchronous firmware loading interface
> >> with the useless timeout handling. The nowait version of the same
> >> loader
> >> doesn't time out, and should not have that problem. The sync version
> >> should be removed from the kernel, it just causes all sorts of
> >> problems
> >> since it exists.
> >>
> >> Userspace should handle the async request just fine when it comes
> >> back
> >> running, regardless of the time it was submitted.
> >
> > Okay, so the solution is to convert the drivers to use
> > request_firmware_nowait() instead of request_firmware() in
> > their .resume()
> > routines.
>
> [added Rob Landley to CC]
>
> I think asynchronous loading should be made the default behaviour.
> However, we need to think and document how to do firmware loading.
>
> Firmware loading can be done at two different times:
> Device Driver Load
> First use of Device Driver
>
> For a network device, this correlates to when the driver is first
> loaded into memory or at 'ifconfig up' respectively.
>
> At device driver load, firmware loading must be asynchronous. This is
> because device driver init can occur before userspace runs and
> registers a hotplug/uevent event handler. If device use is attempted
> before firmware loads, a -ENOFIRMWARE call would be great so that
> userspace and thus the user can be clearly informed what the problem is.
Why would a driver create an interface before it has the needed
firmware loaded?
> However, at 'first use' firmware loading, the synchronous interface
> should remain. 'ifconfig up' either works or it doesn't, and as I see
> it, has to just hang around until firmware turns up.
What kind of network driver does create an interface for a
non-functioning device? That sounds like a bug on its own.
If a driver binds to a device, it should just have the firmware already
loaded, and not wait until its used. What's the reason for such a
behavior, to let a driver pretend it can handle a device, but it doesn't
even know if all the needed pieces are available on the system?
> One more thing, it seems that the asynchronous firmware loading
> thread just spawns a _request_firmware() call which then times out at
> 60 seconds. I think, if the first request fails it spawns another. 60
> seconds is *far* too long for this type of thing, and this was set at
> 10 seconds before the last two kernel releases (which is even a bit
> slow itself). Unfortunately, this appears to a case of quite senior
> kernel developers pushing a bodge upstream rather than fixing the
> underlying issue :( [1] [2]
The underlying issue are the driver authors, that's not so easy to
fix. :)
Well, 10 seconds are just to short for userspace to react on some
setups, from tiny boxes which are busy, to 512 CPU boxes enumerating
thousands of devices, all had problems here. Any timeout for a
firmware-request is just a broken concept, the request should wait
forever, to be fulfilled or canceled from userspace when it's ready.
Kay
Hi.
Quick read only, but it looks good to me.
Nigel
On 28 May 2007, at 10:06, Kay Sievers wrote:
> The underlying issue are the driver authors, that's not so easy to
> fix. :)
Sorry, I know this maybe be unintentional, but comments like this
make me somewhat angry.
If there is no decent documentation as to how to do it the right way
(tm), how do you expect people to do it the right way (tm)?
> Any timeout for a
> firmware-request is just a broken concept, the request should wait
> forever, to be fulfilled or canceled from userspace when it's ready.
What I wrote above is especially true when the in-kernel APIs
themselves do things the wrong way (tm) themselves. Thus, even more
thought is required to work around this imperfect behaviour in a sane
manner. And without documentation, every single device driver author
has to go through this thought process themselves. Unsurprisingly,
they often get it wrong. But as there's no decent documentation to do
it right, it's *not* their fault. I'd suggest it's more the fault of
the core kernel devs who fail to fix up a questionable firmware
loading interface, then fail to document how to work around its
shortcomings.
Again, apologies if this sounds angry, I don't want to start an
argument. But as someone just starting out here, this kind of thing
can be very frustrating, as even with the best will in the world,
achieving the right way (tm) is basically impossible if those in the
know about what the right way (tm) is fail to share this information.
Michael-Luke Jones
Now a technical rather than emotional response...
On 28 May 2007, at 10:06, Kay Sievers wrote:
>> At device driver load, firmware loading must be asynchronous. This is
>> because device driver init can occur before userspace runs and
>> registers a hotplug/uevent event handler. If device use is attempted
>> before firmware loads, a -ENOFIRMWARE call would be great so that
>> userspace and thus the user can be clearly informed what the
>> problem is.
>
> Why would a driver create an interface before it has the needed
> firmware loaded?
A valid point. But there should be some kind of error notification if
firmware loading hasn't happened correctly rather than a permanent
asynchronous wait in which the interface fails to turn up. Possibly a
kernel information printk or something, which does not exist at the
moment.
>> However, at 'first use' firmware loading, the synchronous interface
>> should remain. 'ifconfig up' either works or it doesn't, and as I see
>> it, has to just hang around until firmware turns up.
>
> What kind of network driver does create an interface for a
> non-functioning device? That sounds like a bug on its own.
Unclear. My point was that when ifconfig up exits, the interface
should be up, not asynchronously waiting for firmware to be loaded,
then taken up in the background. Thus, firmware loading in this case
should be kept synchronous, in my opinion.
> If a driver binds to a device, it should just have the firmware
> already
> loaded, and not wait until its used. What's the reason for such a
> behavior, to let a driver pretend it can handle a device, but it
> doesn't
> even know if all the needed pieces are available on the system?
Basically, you have a device which can carry out different
functions depending on the firmware loaded into it. Driver A is
specific to this device, and loads the firmware. Driver B uses
functions exported by Driver A to carry out one particular function
of the device. Driver C uses the same functions to carry out a
totally different function on the same device, but with different
firmware loaded.
Add in multiple devices handled by Driver A, all with different
functionality, and sometimes with combinations of functionality that
can coexist, and you see that when Driver A loads it cannot possibly
know which firmware to load, but must wait for other Drivers to turn
up and be put into use. Thus it 'pretends' to handle all the devices
until it's forced to make a choice.
Yes, this is hellishly complicated. Blame Intel :)
> The underlying issue are the driver authors, that's not so easy to
> fix. :)
Addressed in previous email.
> Well, 10 seconds are just to short for userspace to react on some
> setups, from tiny boxes which are busy, to 512 CPU boxes enumerating
> thousands of devices, all had problems here. Any timeout for a
> firmware-request is just a broken concept, the request should wait
> forever, to be fulfilled or canceled from userspace when it's ready.
Absolutely. I said this in an earlier email and suggested rejecting
this patchset on the grounds that it was another bodge covering over
a fundamentally broken area of the kernel :)
> Kay
Michael-Luke Jones
Hi!
> > > > What exactly is the problem we see here? The timeout of the firmware loader?
> > > > What goes wrong with frozen userspace, usually there is only a netlink
> > > > message sent from the kernel, which should be received and handled
> > > > just fine when userspace is running again.
> > >
> > > Driver calls request_firmware in the resume method. The userspace helper
> > > can't be run because it's been frozen, so the firmware never gets loaded
> > > and the call times out. The driver then fails to resume. While all this
> > > is happening, the rest of the kernel is blocking on that resume method.
> > > The firmware can be loaded once userspace has been started again, but by
> > > that time the driver has given up.
> >
> > Seems, that's just the broken synchronous firmware loading interface
> > with the useless timeout handling. The nowait version of the same loader
> > doesn't time out, and should not have that problem. The sync version
> > should be removed from the kernel, it just causes all sorts of problems
> > since it exists.
> >
> > Userspace should handle the async request just fine when it comes back
> > running, regardless of the time it was submitted.
>
> Okay, so the solution is to convert the drivers to use
> request_firmware_nowait() instead of request_firmware() in their .resume()
> routines.
You'll just get deadlock at different level (and more rare).
Imagine disk with its firmware on NFS and NFS with its firmware on
disk.
(Or maybe firmware loader doing find /, including both disk and
NFS). Just don't call request_firmware_* from .resume().
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Monday, 28 May 2007 10:30, Nigel Cunningham wrote:
> Hi.
>
> On Sun, 2007-05-27 at 23:45 +0200, Rafael J. Wysocki wrote:
> > On Sunday, 27 May 2007 22:49, Matthew Garrett wrote:
> > > On Sun, May 27, 2007 at 10:31:53PM +0200, Rafael J. Wysocki wrote:
> > > > From: Rafael J. Wysocki <[email protected]>
> > > >
> > > > Use a hibernation and suspend notifier to disable the firmware requesting
> > > > mechanism before a hibernation/suspend and enable it after the operation.
> > >
> > > This avoids the problem of .resume methods calling userspace while
> > > userspace is frozen and a resulting hang, but does it actually result in
> > > the drivers beginning to work again?
> >
> > Well, this was acutally invented before you've decided to remove the freezing
> > of tasks from the suspend code path (which I think is a mistake, but that's
> > only my personal opinion, so it doesn't matter very much ;-)) and I regard it
> > as a workaround.
>
> Suspend-to-ram code paths shouldn't assume userspace is unfrozen anyway.
> Doesn't [u]swsusp have a code path like Suspend2 where we can suspend to
> ram after writing the hibernation image? In that case, it will still be
> possible that we seek to enter and leave S3 with processes frozen.
That's correct.
> Apologies if anyone has already mentioned this - I'm just starting to
> play catchup.
No one has and that's a valid point, I think. :-)
Greetings,
Rafael
On Monday, 28 May 2007 13:15, Pavel Machek wrote:
> Hi!
>
> > > > > What exactly is the problem we see here? The timeout of the firmware loader?
> > > > > What goes wrong with frozen userspace, usually there is only a netlink
> > > > > message sent from the kernel, which should be received and handled
> > > > > just fine when userspace is running again.
> > > >
> > > > Driver calls request_firmware in the resume method. The userspace helper
> > > > can't be run because it's been frozen, so the firmware never gets loaded
> > > > and the call times out. The driver then fails to resume. While all this
> > > > is happening, the rest of the kernel is blocking on that resume method.
> > > > The firmware can be loaded once userspace has been started again, but by
> > > > that time the driver has given up.
> > >
> > > Seems, that's just the broken synchronous firmware loading interface
> > > with the useless timeout handling. The nowait version of the same loader
> > > doesn't time out, and should not have that problem. The sync version
> > > should be removed from the kernel, it just causes all sorts of problems
> > > since it exists.
> > >
> > > Userspace should handle the async request just fine when it comes back
> > > running, regardless of the time it was submitted.
> >
> > Okay, so the solution is to convert the drivers to use
> > request_firmware_nowait() instead of request_firmware() in their .resume()
> > routines.
>
> You'll just get deadlock at different level (and more rare).
>
> Imagine disk with its firmware on NFS and NFS with its firmware on
> disk.
Yeah, I've just given the very same example in another message. :-)
I believe the only _solution_ would be to load the firmware into memory
before the suspend and use the in-RAM copy to upload it into the device
in .resume().
A PM_PRE_FREEZE notifier could be used for that just fine, BTW.
Greetings,
Rafael
On Mon, 2007-05-28 at 13:15 +0200, Pavel Machek wrote:
> Hi!
>
> > > > > What exactly is the problem we see here? The timeout of the firmware loader?
> > > > > What goes wrong with frozen userspace, usually there is only a netlink
> > > > > message sent from the kernel, which should be received and handled
> > > > > just fine when userspace is running again.
> > > >
> > > > Driver calls request_firmware in the resume method. The userspace helper
> > > > can't be run because it's been frozen, so the firmware never gets loaded
> > > > and the call times out. The driver then fails to resume. While all this
> > > > is happening, the rest of the kernel is blocking on that resume method.
> > > > The firmware can be loaded once userspace has been started again, but by
> > > > that time the driver has given up.
> > >
> > > Seems, that's just the broken synchronous firmware loading interface
> > > with the useless timeout handling. The nowait version of the same loader
> > > doesn't time out, and should not have that problem. The sync version
> > > should be removed from the kernel, it just causes all sorts of problems
> > > since it exists.
> > >
> > > Userspace should handle the async request just fine when it comes back
> > > running, regardless of the time it was submitted.
> >
> > Okay, so the solution is to convert the drivers to use
> > request_firmware_nowait() instead of request_firmware() in their .resume()
> > routines.
>
> You'll just get deadlock at different level (and more rare).
>
> Imagine disk with its firmware on NFS and NFS with its firmware on
> disk.
>
> (Or maybe firmware loader doing find /, including both disk and
> NFS). Just don't call request_firmware_* from .resume().
A driver for a bootup-critical device like this should just never
release the firmware after the first load. There is absolutely no point
in doing that.
Kay
On Mon 2007-05-28 13:24:53, Kay Sievers wrote:
> On Mon, 2007-05-28 at 13:15 +0200, Pavel Machek wrote:
> > Hi!
> >
> > > > > > What exactly is the problem we see here? The timeout of the firmware loader?
> > > > > > What goes wrong with frozen userspace, usually there is only a netlink
> > > > > > message sent from the kernel, which should be received and handled
> > > > > > just fine when userspace is running again.
> > > > >
> > > > > Driver calls request_firmware in the resume method. The userspace helper
> > > > > can't be run because it's been frozen, so the firmware never gets loaded
> > > > > and the call times out. The driver then fails to resume. While all this
> > > > > is happening, the rest of the kernel is blocking on that resume method.
> > > > > The firmware can be loaded once userspace has been started again, but by
> > > > > that time the driver has given up.
> > > >
> > > > Seems, that's just the broken synchronous firmware loading interface
> > > > with the useless timeout handling. The nowait version of the same loader
> > > > doesn't time out, and should not have that problem. The sync version
> > > > should be removed from the kernel, it just causes all sorts of problems
> > > > since it exists.
> > > >
> > > > Userspace should handle the async request just fine when it comes back
> > > > running, regardless of the time it was submitted.
> > >
> > > Okay, so the solution is to convert the drivers to use
> > > request_firmware_nowait() instead of request_firmware() in their .resume()
> > > routines.
> >
> > You'll just get deadlock at different level (and more rare).
> >
> > Imagine disk with its firmware on NFS and NFS with its firmware on
> > disk.
> >
> > (Or maybe firmware loader doing find /, including both disk and
> > NFS). Just don't call request_firmware_* from .resume().
>
> A driver for a bootup-critical device like this should just never
> release the firmware after the first load. There is absolutely no point
> in doing that.
It does not have to be _bootup-critical_ device. Problem is any device
that might be used by userspace firmware loader. And that is _any_
device.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On 28 May 2007, at 12:24, Kay Sievers wrote:
> A driver for a bootup-critical device like this should just never
> release the firmware after the first load. There is absolutely no
> point
> in doing that.
Bogus argument: is a USB-Ethernet device which needs firmware loading
boot-up critical? Not on the surface, but if the device loads root
over this device, it suddenly is.
This functionality should also be written into the firmware-class
(and this fact *is* acknowledged in the sparse documentation).
Michael-Luke Jones
> This functionality should also be written into the firmware-class
> (and this fact *is* acknowledged in the sparse documentation).
Feel free to submit documentation improvements.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Mon, 2007-05-28 at 12:38 +0100, Michael-Luke Jones wrote:
> On 28 May 2007, at 12:24, Kay Sievers wrote:
>
> > A driver for a bootup-critical device like this should just never
> > release the firmware after the first load. There is absolutely no
> > point
> > in doing that.
>
> Bogus argument: is a USB-Ethernet device which needs firmware loading
> boot-up critical? Not on the surface, but if the device loads root
> over this device, it suddenly is.
Releasing loaded firmware for anything that needs to handle situations
like this just doesn't make sense.
> This functionality should also be written into the firmware-class
> (and this fact *is* acknowledged in the sparse documentation).
Yeah, but these are just words, nothing people could use. Unfortunately
the author of this document died a few years ago.
Either the whole idea of userspace firmware-loading should be considered
as a problem impossible to do right because of its unsolvable side
effects. Or at least releasing loaded firmware should be the exception
for drivers which can be sure, that the firmware is not needed in
situations we try to work around here.
Kay
On Mon, 2007-05-28 at 11:26 +0100, Michael-Luke Jones wrote:
> On 28 May 2007, at 10:06, Kay Sievers wrote:
>
> > The underlying issue are the driver authors, that's not so easy to
> > fix. :)
>
> Sorry, I know this maybe be unintentional, but comments like this
> make me somewhat angry.
It was the response to "pushing a bodge upstream", while we just try to
make things working. :)
We are fighting that broken firmware-loader model for years, but every
driver author seems to have its own idea how that should work in theory,
but they are not even willing to switch to the existing version that is
expected to work better.
> If there is no decent documentation as to how to do it the right way
> (tm), how do you expect people to do it the right way (tm)?
>
> > Any timeout for a
> > firmware-request is just a broken concept, the request should wait
> > forever, to be fulfilled or canceled from userspace when it's ready.
>
> What I wrote above is especially true when the in-kernel APIs
> themselves do things the wrong way (tm) themselves. Thus, even more
> thought is required to work around this imperfect behaviour in a sane
> manner. And without documentation, every single device driver author
> has to go through this thought process themselves. Unsurprisingly,
> they often get it wrong. But as there's no decent documentation to do
> it right, it's *not* their fault. I'd suggest it's more the fault of
> the core kernel devs who fail to fix up a questionable firmware
> loading interface, then fail to document how to work around its
> shortcomings.
>
> Again, apologies if this sounds angry, I don't want to start an
> argument. But as someone just starting out here, this kind of thing
> can be very frustrating, as even with the best will in the world,
> achieving the right way (tm) is basically impossible if those in the
> know about what the right way (tm) is fail to share this information.
In our development model, users of an interface are expected to improve
it, because nobody else really knows what's needed. That unfortunately
didn't happen so far.
We get this kind of conversation every few months since a few years now.
I wonder, if we will see code, or at least come up with a better idea
this time. :)
Kay
On 28 May 2007, at 13:01, Kay Sievers wrote:
> In our development model, users of an interface are expected to
> improve
> it, because nobody else really knows what's needed. That unfortunately
> didn't happen so far.
Thanks for responding :)
The fact that no-one is told to use the new right way (tm) in any
available documentation does not help matters improve. One issue is
that the 'asynchronous' interface seems to rely on the same 'dodgy'
timeout mechanism as the original request_firmware() call... Looks
like the lack of a maintainer should be the opportunity for a rework
of this API.
Michael-Luke Jones
On 28 May 2007, at 12:51, Kay Sievers wrote:
> Either the whole idea of userspace firmware-loading should be
> considered
> as a problem impossible to do right because of its unsolvable side
> effects. Or at least releasing loaded firmware should be the exception
> for drivers which can be sure, that the firmware is not needed in
> situations we try to work around here.
I don't believe that it is impossible. What is needed is some
thought, and maybe a loose spec.
We need to think about safe ways of solving a simple problem: what to
do when userspace is unable to respond to a kernel uevent request. We
now have a timeout, whether it be handled synchronously or in the
background. I don't think either solution is good enough.
[RFC] Possible Plan:
1) request_firmware() should stick around unmodified but be marked
__deprecated.
2) request_firmware_nowait() as it stands should probably be removed.
After all, it only has 2 in-kernel users at the moment.
3) uevent interface should be notified when userspace handlers are /
are not available so that events can be queued to be handled when
userspace does turn up and re-register a hotplug event handler.
4) request_firmware_async() introduced: asynchronous firmware loading
interface built on basis of 3, such that problems at early boot and
suspend / resume are avoided. Also request_firmware_async() taught to
retain firmware in memory by default such that subsequent calls do
not require a call to userspace if userspace is unavailable.
6) Documentation on correct firmware loading behaviour for driver
authors written, together with migration notes for existing users of
request_firmware().
7) Existing uses of request_firmware() converted.
*Grumble ahead*
Unfortunately, I don't properly understand the uevent interface, so
some of the above may be inaccurate. This is *not* my fault as there
is no decent documentation (and btw, telling me to write the
documentation is not the answer to that problem).
Michael-Luke
On Mon, 2007-05-28 at 13:26 +0100, Michael-Luke Jones wrote:
> On 28 May 2007, at 12:51, Kay Sievers wrote:
>
> > Either the whole idea of userspace firmware-loading should be
> > considered
> > as a problem impossible to do right because of its unsolvable side
> > effects. Or at least releasing loaded firmware should be the exception
> > for drivers which can be sure, that the firmware is not needed in
> > situations we try to work around here.
>
> I don't believe that it is impossible. What is needed is some
> thought, and maybe a loose spec.
Hehe, let's see. :)
> We need to think about safe ways of solving a simple problem: what to
> do when userspace is unable to respond to a kernel uevent request. We
> now have a timeout, whether it be handled synchronously or in the
> background. I don't think either solution is good enough.
>
> [RFC] Possible Plan:
>
> 1) request_firmware() should stick around unmodified but be marked
> __deprecated.
>
> 2) request_firmware_nowait() as it stands should probably be removed.
> After all, it only has 2 in-kernel users at the moment.
We should probably leave the whole firmware class alone, add something
new, and convert the current users.
> 3) uevent interface should be notified when userspace handlers are /
> are not available so that events can be queued to be handled when
> userspace does turn up and re-register a hotplug event handler.
Uevents are queued by the netlink socket buffer. If userspace can't
receice them (udevd) while it's frozen, they will just be handled (in
the right order) when userspace runs again. We can queue up thousands of
events, and it should already work that way for a long time now.
> 4) request_firmware_async() introduced: asynchronous firmware loading
> interface built on basis of 3, such that problems at early boot and
> suspend / resume are avoided. Also request_firmware_async() taught to
> retain firmware in memory by default such that subsequent calls do
> not require a call to userspace if userspace is unavailable.
Something like this makes sense, yes.
> 6) Documentation on correct firmware loading behaviour for driver
> authors written, together with migration notes for existing users of
> request_firmware().
>
> 7) Existing uses of request_firmware() converted.
>
> *Grumble ahead*
> Unfortunately, I don't properly understand the uevent interface, so
> some of the above may be inaccurate.
The event emission in the kernel and the userspace side is probably
fine. Two years ago, benh already run into exactly the same suspend
problems, and I think I addressed the problems on the uevent side.
> This is *not* my fault as there
> is no decent documentation (and btw, telling me to write the
> documentation is not the answer to that problem).
Sure, we always need at least one working example before we can even
tell what's the right way to do it. :)
Kay
Hi!
> *Grumble ahead*
> Unfortunately, I don't properly understand the uevent interface, so
> some of the above may be inaccurate. This is *not* my fault as there
> is no decent documentation (and btw, telling me to write the
Translated: "I don't know what I'm talking about, but I still like to
flame people. I'm too lazy to figure out how it currently works, but
I'll just state it is not my fault".
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Hi!
> >From: Rafael J. Wysocki <[email protected]>
> >
> >Use a hibernation and suspend notifier to disable the firmware
> >requesting
> >mechanism before a hibernation/suspend and enable it after the
> >operation.
> >
> >Signed-off-by: Rafael J. Wysocki <[email protected]>
> >
> > drivers/base/firmware_class.c | 36 ++++++++++++++++++++++++++++++
> >++++++
> > 1 file changed, 36 insertions(+)
>
> I don't like this approach, as I feel that the firmware loading
> interface should be able to detect if a firmware load request is not
> being handled, due to absence of userspace / hotplug handler presence.
I don't think that's possible. If hotplug handler needs /dev/foo, but
/dev/foo is not available, it will just block waiting there.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Hi!
> Use a hibernation and suspend notifier to disable the user mode helper before
> a hibernation/suspend and enable it after the operation.
Seems ok to me.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Hi!
> From: Rafael J. Wysocki <[email protected]>
>
> Use a hibernation and suspend notifier to disable the firmware requesting
> mechanism before a hibernation/suspend and enable it after the operation.
>
> Signed-off-by: Rafael J. Wysocki <[email protected]>
>
> drivers/base/firmware_class.c | 36 ++++++++++++++++++++++++++++++++++++
> 1 file changed, 36 insertions(+)
>
> Index: linux-2.6.22-rc3/drivers/base/firmware_class.c
> ===================================================================
> --- linux-2.6.22-rc3.orig/drivers/base/firmware_class.c 2007-05-27 19:43:03.000000000 +0200
> +++ linux-2.6.22-rc3/drivers/base/firmware_class.c 2007-05-27 19:44:06.000000000 +0200
> @@ -17,6 +17,8 @@
> #include <linux/bitops.h>
> #include <linux/mutex.h>
> #include <linux/kthread.h>
> +#include <linux/notifier.h>
> +#include <linux/suspend.h>
>
> #include <linux/firmware.h>
> #include "base.h"
> @@ -35,6 +37,9 @@ enum {
>
> static int loading_timeout = 60; /* In seconds */
>
> +/* If set, _request_firmware() will exit immediately returning -EBUSY */
> +static int requesting_firmware_disabled;
> +
> /* fw_lock could be moved to 'struct firmware_priv' but since it is just
> * guarding for corner cases a global lock should be OK */
> static DEFINE_MUTEX(fw_lock);
> @@ -397,6 +402,9 @@ _request_firmware(const struct firmware
> struct firmware *firmware;
> int retval;
>
> + if (requesting_firmware_disabled)
> + return -EBUSY;
WARN_ON()? This should not happen, and if it does, we'll end up with
non-working device.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On 28 May 2007, at 14:00, Pavel Machek wrote:
> I'm too lazy to figure out how it currently works, but
> I'll just state it is not my fault
Figuring out how a userspace/kernelspace interface works should not
rely on having to read kernelspace code. Unfortunately, in the case
of hotplug / uevents, there is no such documentation. Thus, what
kernelspace / userspace interactions actually are and what they
should be is unclear, leading to confusion over corner cases, such as
this one.
Michael-Luke Jones
Hi!
> Make it possible to register hibernation and suspend notifiers, so that
> subsystems can perform hibernation-related or suspend-related operations that
> should not be carried out by device drivers' .suspend() and
> .resume() routines.
I believe it adds _way_ too many notifiers.
> +PM_PRE_FREEZE The system is going to hibernate or suspend, tasks will
> + be frozen immediately
80 columns. Anyway yes, this one is needed.
> +PM_POST_THAW Tasks have just been thawed after a resume or restore
> + from a hibernation image
Symetrical with the previous one, useful for freeing firmware images, ok.
> +PM_HIBERNATION_PREPARE The system is preparing for hibernation. Tasks have
> + been frozen, memory is going to be freed and devices
> + are going to be suspended.
What is this one good for?
> +PM_SNAPSHOT_FAILED The creation of hibernation image has failed. Tasks
> + will be thawed immediately.
Does this one need to be different from POST_THAW?
I do not see the need for the other chains. Notice that we do not
_want_ to have too many of them, because changing anything in the
hibernation will become impossible with 10 chains having intimate
details of suspend sequence.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Sun, 27 May 2007, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <[email protected]>
>
> Use a hibernation and suspend notifier to disable the firmware requesting
> mechanism before a hibernation/suspend and enable it after the operation.
You're using the PM_PRE_FREEZE and PM_POST_THAW notifiers for both this
and the userspace helper change. Is it your intention that drivers
should continue to request these services but encounter an error if the
request occurs at the wrong time? Or do you expect drivers to use the
notifier chains to know when they shouldn't make any requests?
In the second case you may have a problem, because there's no
specification about the order in which the notifiers are sent. The
service may get disabled before the driver learns it isn't available,
or the driver may think the service is once again available before it
gets enabled.
Alan Stern
On Sun, 27 May 2007, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <[email protected]>
>
> Make it possible to register hibernation and suspend notifiers, so that
> subsystems can perform hibernation-related or suspend-related operations that
> should not be carried out by device drivers' .suspend() and .resume() routines.
> +static DEFINE_MUTEX(pm_notifier_lock);
> +
> +static RAW_NOTIFIER_HEAD(pm_chain);
Is there any particular reason you chose to use a RAW_NOTIFIER_HEAD
with an explicit mutex instead of using a BLOCKING_NOTIFIER_HEAD?
Alan Stern
On Sun, 27 May 2007, Matthew Garrett wrote:
> > BTW, I know of two subsystems that want their kernel threads to be frozen for
> > synchronization purposes. Please see these messages:
> >
> > 1) https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012592.html
> > (plus follow up)
> >
> > 2) http://marc.info/?l=linux-kernel&m=117919066830575&w=2
>
> I'm not entirely sold on this. The issue is that there's the possibility
> of races during suspend/resume? It sounds like that should be
> implemented in the driver, rather than depending on a side-effect of
> process freezing. Otherwise there's no way of selectively suspending
> that device.
I can't speak for the second example, but there's a good reason the
first example works this way. It's not a matter of races; the problem
is that the kernel thread's job is to selectively suspend and resume
devices. We don't want it doing this while a system sleep is in
progress; it would (and in fact has, before the thread was made
freezable) cause the sleep transition to abort.
Handling it entirely from within the drivers is possible in theory.
Unfortunately the design of the PM core has not leant itself to such an
approach. Using separate callbacks for hibernation vs. STR will help,
as will Raphael's notifiers.
Alan Stern
On Mon, May 28, 2007 at 12:09:30PM -0400, Alan Stern wrote:
> I can't speak for the second example, but there's a good reason the
> first example works this way. It's not a matter of races; the problem
> is that the kernel thread's job is to selectively suspend and resume
> devices. We don't want it doing this while a system sleep is in
> progress; it would (and in fact has, before the thread was made
> freezable) cause the sleep transition to abort.
How does this work on PPC or APM systems?
--
Matthew Garrett | [email protected]
On Mon, 28 May 2007, Matthew Garrett wrote:
> On Mon, May 28, 2007 at 12:09:30PM -0400, Alan Stern wrote:
>
> > I can't speak for the second example, but there's a good reason the
> > first example works this way. It's not a matter of races; the problem
> > is that the kernel thread's job is to selectively suspend and resume
> > devices. We don't want it doing this while a system sleep is in
> > progress; it would (and in fact has, before the thread was made
> > freezable) cause the sleep transition to abort.
>
> How does this work on PPC or APM systems?
For hibernation it behaves the same as on other types of systems.
For STR it generally works okay. There was one report of suspends
aborting, and it looked like this was caused by selective resumes
originating from userspace. This seemed to be unrelated to the kernel
threads; apparently some program was running while the STR was in
progress, and causing the problem. For example, the lsusb program will
do a selective resume on every USB device as it scans through them all.
However that's just a guess, we haven't fully resolved that bug report.
The theoretical answer is that it behaves the way we want. The kernel
thread does selective resumes in response to device requests. If such
a request comes in while the system is asleep it will awaken the
system; so it's only logical that a request coming in while the system
is in the process of going to sleep should abort the suspend.
Alan Stern
On Mon, May 28, 2007 at 12:43:36PM -0400, Alan Stern wrote:
> On Mon, 28 May 2007, Matthew Garrett wrote:
> > On Mon, May 28, 2007 at 12:09:30PM -0400, Alan Stern wrote:
> >
> > > I can't speak for the second example, but there's a good reason the
> > > first example works this way. It's not a matter of races; the problem
> > > is that the kernel thread's job is to selectively suspend and resume
> > > devices. We don't want it doing this while a system sleep is in
> > > progress; it would (and in fact has, before the thread was made
> > > freezable) cause the sleep transition to abort.
> >
> > How does this work on PPC or APM systems?
>
> For hibernation it behaves the same as on other types of systems.
>
> For STR it generally works okay. There was one report of suspends
> aborting, and it looked like this was caused by selective resumes
> originating from userspace. This seemed to be unrelated to the kernel
> threads; apparently some program was running while the STR was in
> progress, and causing the problem. For example, the lsusb program will
> do a selective resume on every USB device as it scans through them all.
> However that's just a guess, we haven't fully resolved that bug report.
>
> The theoretical answer is that it behaves the way we want. The kernel
> thread does selective resumes in response to device requests. If such
> a request comes in while the system is asleep it will awaken the
> system; so it's only logical that a request coming in while the system
> is in the process of going to sleep should abort the suspend.
Ok, I guess I'm still not clear on this :) If it doesn't cause major
problems on Powermac or APM systems, why is freezing the thread
beneficial on ACPI systems?
--
Matthew Garrett | [email protected]
On Monday, 28 May 2007 15:12, Pavel Machek wrote:
> Hi!
>
> > Make it possible to register hibernation and suspend notifiers, so that
> > subsystems can perform hibernation-related or suspend-related operations that
> > should not be carried out by device drivers' .suspend() and
> > .resume() routines.
>
> I believe it adds _way_ too many notifiers.
>
> > +PM_PRE_FREEZE The system is going to hibernate or suspend, tasks will
> > + be frozen immediately
>
> 80 columns.
OK
> Anyway yes, this one is needed.
>
> > +PM_POST_THAW Tasks have just been thawed after a resume or restore
> > + from a hibernation image
>
> Symetrical with the previous one, useful for freeing firmware images, ok.
>
> > +PM_HIBERNATION_PREPARE The system is preparing for hibernation. Tasks have
> > + been frozen, memory is going to be freed and devices
> > + are going to be suspended.
>
> What is this one good for?
Allocating memory before the hibernation, mainly. Anything done after freezing
tasks.
> > +PM_SNAPSHOT_FAILED The creation of hibernation image has failed. Tasks
> > + will be thawed immediately.
>
> Does this one need to be different from POST_THAW?
Do you mean POST_RESTORE? Well, maybe not. Yesterday I thought it would be
for a specific reason, but now I can't recall what it was. :-(
> I do not see the need for the other chains. Notice that we do not
> _want_ to have too many of them, because changing anything in the
> hibernation will become impossible with 10 chains having intimate
> details of suspend sequence.
I think the two suspend-specific might be handy.
What about:
PM_PRE_FREEZE
PM_HIBERNATION_PREPARE
PM_POST_HIBERNATION (handling failed snapshots too)
PM_POST_THAW
plus PM_RESTORE_PREPARE for the restore and
PM_SUSPEND_PREPARE
PM_POST_SUSPEND (handling failed suspends too)
for the suspend code path?
Rafael
On Monday, 28 May 2007 17:56, Alan Stern wrote:
> On Sun, 27 May 2007, Rafael J. Wysocki wrote:
>
> > From: Rafael J. Wysocki <[email protected]>
> >
> > Make it possible to register hibernation and suspend notifiers, so that
> > subsystems can perform hibernation-related or suspend-related operations that
> > should not be carried out by device drivers' .suspend() and .resume() routines.
>
> > +static DEFINE_MUTEX(pm_notifier_lock);
> > +
> > +static RAW_NOTIFIER_HEAD(pm_chain);
>
> Is there any particular reason you chose to use a RAW_NOTIFIER_HEAD
> with an explicit mutex instead of using a BLOCKING_NOTIFIER_HEAD?
Hmm, not really. I based it on the CPU hotplug notifiers, actually.
I'll see if I can use BLOCKING_NOTIFIER_HEAD.
Greetings,
Rafael
On Monday, 28 May 2007 17:55, Alan Stern wrote:
> On Sun, 27 May 2007, Rafael J. Wysocki wrote:
>
> > From: Rafael J. Wysocki <[email protected]>
> >
> > Use a hibernation and suspend notifier to disable the firmware requesting
> > mechanism before a hibernation/suspend and enable it after the operation.
>
> You're using the PM_PRE_FREEZE and PM_POST_THAW notifiers for both this
> and the userspace helper change. Is it your intention that drivers
> should continue to request these services but encounter an error if the
> request occurs at the wrong time? Or do you expect drivers to use the
> notifier chains to know when they shouldn't make any requests?
In fact, I'd like drivers to use notifiers to actually load the firmware into
memory before hibernation/suspend. Namely, if there's PM_PRE_FREEZE, the
driver calls request_firmware() from within the notifier and saves the firmware
in memory for future use, if need be. Later, when PM_POST_THAW comes, the
memory holding the firmware is released.
Unfortunately there are drivers that call request_firmware() directly from
.resume() which blocks until timeout expires and fails anyway. I just wanted
this to fail immediately, without waiting.
> In the second case you may have a problem, because there's no
> specification about the order in which the notifiers are sent. The
> service may get disabled before the driver learns it isn't available,
> or the driver may think the service is once again available before it
> gets enabled.
Yes.
Greetings,
Rafael
On Mon, 28 May 2007, Matthew Garrett wrote:
> On Mon, May 28, 2007 at 12:43:36PM -0400, Alan Stern wrote:
> > On Mon, 28 May 2007, Matthew Garrett wrote:
> > > On Mon, May 28, 2007 at 12:09:30PM -0400, Alan Stern wrote:
> > >
> > > > I can't speak for the second example, but there's a good reason the
> > > > first example works this way. It's not a matter of races; the problem
> > > > is that the kernel thread's job is to selectively suspend and resume
> > > > devices. We don't want it doing this while a system sleep is in
> > > > progress; it would (and in fact has, before the thread was made
> > > > freezable) cause the sleep transition to abort.
> > >
> > > How does this work on PPC or APM systems?
> >
> > For hibernation it behaves the same as on other types of systems.
> >
> > For STR it generally works okay. There was one report of suspends
> > aborting, and it looked like this was caused by selective resumes
> > originating from userspace. This seemed to be unrelated to the kernel
> > threads; apparently some program was running while the STR was in
> > progress, and causing the problem. For example, the lsusb program will
> > do a selective resume on every USB device as it scans through them all.
> > However that's just a guess, we haven't fully resolved that bug report.
> >
> > The theoretical answer is that it behaves the way we want. The kernel
> > thread does selective resumes in response to device requests. If such
> > a request comes in while the system is asleep it will awaken the
> > system; so it's only logical that a request coming in while the system
> > is in the process of going to sleep should abort the suspend.
>
> Ok, I guess I'm still not clear on this :) If it doesn't cause major
> problems on Powermac or APM systems, why is freezing the thread
> beneficial on ACPI systems?
Isn't it true, even on PPC and APM systems, that tasks are frozen for
hibernation? The difference is that they aren't frozen for STR.
Hence the answer to your question: Not freezing the thread would work
okay on ACPI systems for STR. (In fact, not freezing anything would
probably work okay for STR.) But it doesn't work with hibernation.
Alan Stern
On 5/28/07, Rafael J. Wysocki <[email protected]> wrote:
> On Monday, 28 May 2007 17:55, Alan Stern wrote:
> > You're using the PM_PRE_FREEZE and PM_POST_THAW notifiers for both this
> > and the userspace helper change. Is it your intention that drivers
> > should continue to request these services but encounter an error if the
> > request occurs at the wrong time? Or do you expect drivers to use the
> > notifier chains to know when they shouldn't make any requests?
>
> In fact, I'd like drivers to use notifiers to actually load the firmware into
> memory before hibernation/suspend. Namely, if there's PM_PRE_FREEZE, the
> driver calls request_firmware() from within the notifier and saves the firmware
> in memory for future use, if need be. Later, when PM_POST_THAW comes, the
> memory holding the firmware is released.
>
> Unfortunately there are drivers that call request_firmware() directly from
> .resume() which blocks until timeout expires and fails anyway. I just wanted
> this to fail immediately, without waiting.
Stupid question time. Wouldn't it just be easier to have
request_firmware() keep a copy of the firmware once it's been loaded?
We're not talking about a lot of memory that would be wasted, and that
way no drivers have to be changed.
Ray
Hi!
> > > I can't speak for the second example, but there's a good reason the
> > > first example works this way. It's not a matter of races; the problem
> > > is that the kernel thread's job is to selectively suspend and resume
> > > devices. We don't want it doing this while a system sleep is in
> > > progress; it would (and in fact has, before the thread was made
> > > freezable) cause the sleep transition to abort.
> >
> > How does this work on PPC or APM systems?
>
> For hibernation it behaves the same as on other types of systems.
>
> For STR it generally works okay. There was one report of suspends
> aborting, and it looked like this was caused by selective resumes
> originating from userspace. This seemed to be unrelated to the kernel
> threads; apparently some program was running while the STR was in
> progress, and causing the problem. For example, the lsusb program will
> do a selective resume on every USB device as it scans through them all.
> However that's just a guess, we haven't fully resolved that bug report.
>
> The theoretical answer is that it behaves the way we want. The kernel
> thread does selective resumes in response to device requests. If such
> a request comes in while the system is asleep it will awaken the
> system; so it's only logical that a request coming in while the system
> is in the process of going to sleep should abort the suspend.
I'd say that it shows ppc being broken. User wanted to suspend the
system, and now unrelated task did lsusb... and system will not sleep.
AFAICT it is DoS issue -- if one of your users keeps doing lsusb, root
will not be able to suspend the system.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Hi!
> >In fact, I'd like drivers to use notifiers to actually
> >load the firmware into
> >memory before hibernation/suspend. Namely, if there's
> >PM_PRE_FREEZE, the
> >driver calls request_firmware() from within the
> >notifier and saves the firmware
> >in memory for future use, if need be. Later, when
> >PM_POST_THAW comes, the
> >memory holding the firmware is released.
> >
> >Unfortunately there are drivers that call
> >request_firmware() directly from
> >.resume() which blocks until timeout expires and fails
> >anyway. I just wanted
> >this to fail immediately, without waiting.
>
> Stupid question time. Wouldn't it just be easier to have
> request_firmware() keep a copy of the firmware once it's
> been loaded?
> We're not talking about a lot of memory that would be
> wasted, and that
> way no drivers have to be changed.
Actually, I like this idea. Firmware problems magically disappear.
...unless someone uses x86 emulator in userland to POST graphics card.
You can't 'cache' that. But that's separate problem.
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Mon, 28 May 2007, Pavel Machek wrote:
> > The theoretical answer is that it behaves the way we want. The kernel
> > thread does selective resumes in response to device requests. If such
> > a request comes in while the system is asleep it will awaken the
> > system; so it's only logical that a request coming in while the system
> > is in the process of going to sleep should abort the suspend.
>
> I'd say that it shows ppc being broken. User wanted to suspend the
> system, and now unrelated task did lsusb... and system will not sleep.
>
> AFAICT it is DoS issue -- if one of your users keeps doing lsusb, root
> will not be able to suspend the system.
This is a matter of one's philosophy. In suspend-to-RAM, should tasks
be frozen or should I/O queues be frozen?
With the USB subsystem I have followed the approach taken by the PM
core, which is that tasks are frozen. But one can -- and Linus has on
at least one occasion -- make a good case that tasks should be left
running while only I/O is frozen. This would require the subsystem to
distinguish between a selective device suspend and a system-wide
suspend-to-RAM, so that selective resume could be enabled on demand in
one case but not the other.
It's quite doable in principle -- it's just not the technique I used.
Alan Stern
On Monday 28 May 2007, Alan Stern wrote:
> This is a matter of one's philosophy. In suspend-to-RAM, should tasks
> be frozen or should I/O queues be frozen?
>
> With the USB subsystem I have followed the approach taken by the PM
> core, which is that tasks are frozen. But one can -- and Linus has on
> at least one occasion -- make a good case that tasks should be left
> running while only I/O is frozen.
In fact that makes a heck of a lot more sense to me from the
conceptual point of view. From the hardware perspective, the
task of preparing to enter true suspend states (STR, or suspend
for ACPI; embedded systems have more options) focusses on what
I/O signals are disabled.
Once the relevant I/O signals are first idled, then disabled,
the CPU can do whatever it likes. Whether it runs or not is
purely a workload decision...
Remember too that not all systems suffer from the constraints
that ACPI decrees. In particular, it's not uncommon that some
parts of the system be active in certain suspend states. The
whole point is to turn off as much of the system as possible,
especially the high power portions, while letting work proceed.
Turning off some clocks and peripherals doesn't need to imply
turning them all off, or disabling DMA ... and should not need
to be triggered by a user (or userspace tool) explicitly saying
"go into STR".
> This would require the subsystem to
> distinguish between a selective device suspend and a system-wide
> suspend-to-RAM, so that selective resume could be enabled on demand in
> one case but not the other.
Exactly. "Selective suspend" of parts of the system is a far
more general model. It fits well with runtime power management,
degrades smoothly to states where memory goes into self-refresh
(maybe the system idle loop when NO_HZ is being effective) or
even hibernation (as discussed elsewhere).
- Dave
> It's quite doable in principle -- it's just not the technique I used.
>
> Alan Stern
>
>
On Sunday 27 May 2007 4:45 pm, Michael-Luke Jones wrote:
> (Slightly OT: A particularly nasty race is when an initramfs
> userspace is present, but firmware loading cannot occur because init
> has not run, so proc hasn't been mounted, so a hotplug event handler
> cannot be registered, despite the fact that the firmware is sitting
> on the ramdisk mounted correctly...)
I believe that the current functionality is that /sbin/hotplug is the default.
I think that if there is one in initramfs, it'll get called as a usermode
helper, even before init has run. (If it needs /proc /sys or /dev, setting
it up properly is its problem, but not an insurmountable one.)
I remember discussion of this a while back, and that it was indeed
intentional. I dunno if anybody's actually tried it.
Rob
On Tue, 29 May 2007, David Brownell wrote:
> On Monday 28 May 2007, Alan Stern wrote:
>
> > This is a matter of one's philosophy. In suspend-to-RAM, should tasks
> > be frozen or should I/O queues be frozen?
> >
> > With the USB subsystem I have followed the approach taken by the PM
> > core, which is that tasks are frozen. But one can -- and Linus has on
> > at least one occasion -- make a good case that tasks should be left
> > running while only I/O is frozen.
>
> In fact that makes a heck of a lot more sense to me from the
> conceptual point of view. From the hardware perspective, the
> task of preparing to enter true suspend states (STR, or suspend
> for ACPI; embedded systems have more options) focusses on what
> I/O signals are disabled.
>
> Once the relevant I/O signals are first idled, then disabled,
> the CPU can do whatever it likes. Whether it runs or not is
> purely a workload decision...
>
> Remember too that not all systems suffer from the constraints
> that ACPI decrees. In particular, it's not uncommon that some
> parts of the system be active in certain suspend states. The
> whole point is to turn off as much of the system as possible,
> especially the high power portions, while letting work proceed.
>
> Turning off some clocks and peripherals doesn't need to imply
> turning them all off, or disabling DMA ... and should not need
> to be triggered by a user (or userspace tool) explicitly saying
> "go into STR".
One can think of suspend-to-RAM as nothing more than a super-duper
selective suspend of all devices (including the CPU!). This
illustrates the relationship between suspend-to-RAM and runtime PM.
However there is one important distinction, the DoS issue that Pavel
raised. With true suspend-to-RAM, autoresume on demand is not enabled
-- only on remote wakeup. With runtime PM, both are enabled.
Elegant though it is, the "freeze I/O" approach suffers from
implementation awkwardness. Ideally individual drivers shouldn't have
to worry about it, but the subsystems certainly will.
Consider as an example the class of char device drivers. The only
subsystem they have in common is VFS. It would then be necessary for
every entry point into VFS to check whether a suspend-to-RAM is in
progress, and if it is, block until the suspend is over. Each one of
those entry points is on a hot path, so adding these checks is the sort
of thing one would like to avoid.
By freezing tasks we can eliminate (most) I/O requests at the source
with a single, relatively small amount of code (i.e., the
refrigerator). Freezing I/O, on the other hand, would involve many
checks dispersed throughout a large number of source files -- checks
that would have to execute even when a suspend wasn't in progress.
Alan Stern
On Monday 28 May 2007 5:06 am, Kay Sievers wrote:
> Well, 10 seconds are just to short for userspace to react on some
> setups, from tiny boxes which are busy, to 512 CPU boxes enumerating
> thousands of devices, all had problems here. Any timeout for a
> firmware-request is just a broken concept, the request should wait
> forever, to be fulfilled or canceled from userspace when it's ready.
If it's spawning a new usermode helper process, then figuring out when to give
up and exit is that process's job. If it _can't_ exec usermode helper, then
that should fail immediately.
Where does the timeout come in?
Rob
On Monday, 28 May 2007 17:56, Alan Stern wrote:
> On Sun, 27 May 2007, Rafael J. Wysocki wrote:
>
> > From: Rafael J. Wysocki <[email protected]>
> >
> > Make it possible to register hibernation and suspend notifiers, so that
> > subsystems can perform hibernation-related or suspend-related operations that
> > should not be carried out by device drivers' .suspend() and .resume() routines.
>
> > +static DEFINE_MUTEX(pm_notifier_lock);
> > +
> > +static RAW_NOTIFIER_HEAD(pm_chain);
>
> Is there any particular reason you chose to use a RAW_NOTIFIER_HEAD
> with an explicit mutex instead of using a BLOCKING_NOTIFIER_HEAD?
Yes, this makes the patch simpler.
I have also followed the Pavel's suggestion to limit the nubmer of events.
Please have a look at the updated version of the patch (appended).
Greetings,
Rafael
---
From: Rafael J. Wysocki <[email protected]>
Make it possible to register hibernation and suspend notifiers, so that
subsystems can perform hibernation-related or suspend-related operations that
should not be carried out by device drivers' .suspend() and .resume() routines.
Signed-off-by: Rafael J. Wysocki <[email protected]>
Documentation/power/notifiers.txt | 57 ++++++++++++++++++++++++++++++++++++++
include/linux/notifier.h | 8 +++++
include/linux/suspend.h | 37 ++++++++++++++++++++++--
kernel/power/disk.c | 24 ++++++++++++----
kernel/power/main.c | 14 +++++++++
kernel/power/power.h | 10 ++++++
kernel/power/user.c | 11 +++++--
7 files changed, 150 insertions(+), 11 deletions(-)
Index: linux-2.6.22-rc3/include/linux/suspend.h
===================================================================
--- linux-2.6.22-rc3.orig/include/linux/suspend.h 2007-05-27 19:57:21.000000000 +0200
+++ linux-2.6.22-rc3/include/linux/suspend.h 2007-05-29 22:52:24.000000000 +0200
@@ -54,7 +54,8 @@ struct hibernation_ops {
void (*restore_cleanup)(void);
};
-#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
+#ifdef CONFIG_PM
+#ifdef CONFIG_SOFTWARE_SUSPEND
/* kernel/power/snapshot.c */
extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
static inline void register_nosave_region(unsigned long b, unsigned long e)
@@ -72,7 +73,7 @@ extern unsigned long get_safe_page(gfp_t
extern void hibernation_set_ops(struct hibernation_ops *ops);
extern int hibernate(void);
-#else
+#else /* CONFIG_SOFTWARE_SUSPEND */
static inline void register_nosave_region(unsigned long b, unsigned long e) {}
static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
@@ -81,7 +82,7 @@ static inline void swsusp_unset_page_fre
static inline void hibernation_set_ops(struct hibernation_ops *ops) {}
static inline int hibernate(void) { return -ENOSYS; }
-#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */
+#endif /* CONFIG_SOFTWARE_SUSPEND */
void save_processor_state(void);
void restore_processor_state(void);
@@ -89,4 +90,34 @@ struct saved_context;
void __save_processor_state(struct saved_context *ctxt);
void __restore_processor_state(struct saved_context *ctxt);
+/* kernel/power/main.c */
+extern struct blocking_notifier_head pm_chain_head;
+
+static inline int register_pm_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&pm_chain_head, nb);
+}
+
+static inline int unregister_pm_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&pm_chain_head, nb);
+}
+
+#define pm_notifier(fn, pri) { \
+ static struct notifier_block fn##_nb = \
+ { .notifier_call = fn, .priority = pri }; \
+ register_pm_notifier(&fn##_nb); \
+}
+#else /* CONFIG_PM */
+static inline int register_pm_notifier(struct notifier_block *nb) {
+ return 0;
+}
+
+static inline int unregister_pm_notifier(struct notifier_block *nb) {
+ return 0;
+}
+
+#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
+#endif /* CONFIG_PM */
+
#endif /* _LINUX_SWSUSP_H */
Index: linux-2.6.22-rc3/kernel/power/power.h
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/power.h 2007-05-27 19:57:21.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/power.h 2007-05-29 22:53:12.000000000 +0200
@@ -173,5 +173,15 @@ extern void swsusp_close(void);
extern int suspend_enter(suspend_state_t state);
struct timeval;
+/* kernel/power/swsusp.c */
extern void swsusp_show_speed(struct timeval *, struct timeval *,
unsigned int, char *);
+
+/* kernel/power/main.c */
+extern struct blocking_notifier_head pm_chain_head;
+
+static inline int pm_notifier_call_chain(unsigned long val)
+{
+ return (blocking_notifier_call_chain(&pm_chain_head, val, NULL)
+ == NOTIFY_BAD) ? -EINVAL : 0;
+}
Index: linux-2.6.22-rc3/include/linux/notifier.h
===================================================================
--- linux-2.6.22-rc3.orig/include/linux/notifier.h 2007-05-27 19:57:21.000000000 +0200
+++ linux-2.6.22-rc3/include/linux/notifier.h 2007-05-29 22:58:13.000000000 +0200
@@ -209,5 +209,13 @@ extern int __srcu_notifier_call_chain(st
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
+/* Hibernation and suspend events */
+#define PM_PRE_FREEZE 0x0001 /* Going to freeze tasks */
+#define PM_POST_THAW 0x0002 /* Tasks have just been thawed */
+#define PM_HIBERNATION_PREPARE 0x0003 /* Tasks frozen, going to hibernate */
+#define PM_POST_HIBERNATION 0x0004 /* Hibernation finished, thawing tasks */
+#define PM_SUSPEND_PREPARE 0x0005 /* Going to suspend the system */
+#define PM_POST_SUSPEND 0x0006 /* Suspend finished */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NOTIFIER_H */
Index: linux-2.6.22-rc3/kernel/power/disk.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/disk.c 2007-05-27 19:57:21.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/disk.c 2007-05-30 00:26:41.000000000 +0200
@@ -130,10 +130,14 @@ int hibernation_snapshot(int platform_mo
{
int error;
+ error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+ if (error)
+ return error;
+
/* Free memory before shutting down devices. */
error = swsusp_shrink_memory();
if (error)
- return error;
+ goto Finish;
suspend_console();
error = device_suspend(PMSG_FREEZE);
@@ -161,6 +165,8 @@ int hibernation_snapshot(int platform_mo
device_resume();
Resume_console:
resume_console();
+ Finish:
+ pm_notifier_call_chain(PM_POST_HIBERNATION);
return error;
}
@@ -259,12 +265,17 @@ static void unprepare_processes(void)
{
thaw_processes();
pm_restore_console();
+ pm_notifier_call_chain(PM_POST_THAW);
}
static int prepare_processes(void)
{
int error = 0;
+ error = pm_notifier_call_chain(PM_PRE_FREEZE);
+ if (error)
+ return error;
+
pm_prepare_console();
if (freeze_processes()) {
error = -EBUSY;
@@ -281,9 +292,12 @@ int hibernate(void)
{
int error;
+ mutex_lock(&pm_mutex);
/* The snapshot device should not be opened while we're running */
- if (!atomic_add_unless(&snapshot_device_available, -1, 0))
- return -EBUSY;
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ error = -EBUSY;
+ goto Unlock;
+ }
/* Allocate memory management structures */
error = create_basic_memory_bitmaps();
@@ -294,7 +308,6 @@ int hibernate(void)
if (error)
goto Finish;
- mutex_lock(&pm_mutex);
if (hibernation_mode == HIBERNATION_TESTPROC) {
printk("swsusp debug: Waiting for 5 seconds.\n");
mdelay(5000);
@@ -316,12 +329,13 @@ int hibernate(void)
swsusp_free();
}
Thaw:
- mutex_unlock(&pm_mutex);
unprepare_processes();
Finish:
free_basic_memory_bitmaps();
Exit:
atomic_inc(&snapshot_device_available);
+ Unlock:
+ mutex_unlock(&pm_mutex);
return error;
}
Index: linux-2.6.22-rc3/kernel/power/main.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/main.c 2007-05-27 19:57:21.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/main.c 2007-05-29 22:54:30.000000000 +0200
@@ -24,6 +24,8 @@
#include "power.h"
+BLOCKING_NOTIFIER_HEAD(pm_chain_head);
+
/*This is just an arbitrary number */
#define FREE_PAGE_NUMBER (100)
@@ -82,6 +84,10 @@ static int suspend_prepare(suspend_state
if (!pm_ops || !pm_ops->enter)
return -EPERM;
+ error = pm_notifier_call_chain(PM_PRE_FREEZE);
+ if (error)
+ return error;
+
pm_prepare_console();
if (freeze_processes()) {
@@ -89,6 +95,10 @@ static int suspend_prepare(suspend_state
goto Thaw;
}
+ error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+ if (error)
+ return error;
+
if ((free_pages = global_page_state(NR_FREE_PAGES))
< FREE_PAGE_NUMBER) {
pr_debug("PM: free some memory\n");
@@ -121,9 +131,11 @@ static int suspend_prepare(suspend_state
device_resume();
Resume_console:
resume_console();
+ pm_notifier_call_chain(PM_POST_SUSPEND);
Thaw:
thaw_processes();
pm_restore_console();
+ pm_notifier_call_chain(PM_POST_THAW);
return error;
}
@@ -173,8 +185,10 @@ static void suspend_finish(suspend_state
pm_finish(state);
device_resume();
resume_console();
+ pm_notifier_call_chain(PM_POST_SUSPEND);
thaw_processes();
pm_restore_console();
+ pm_notifier_call_chain(PM_POST_THAW);
}
Index: linux-2.6.22-rc3/kernel/power/user.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/user.c 2007-05-27 19:57:21.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/user.c 2007-05-29 22:35:08.000000000 +0200
@@ -149,9 +149,13 @@ static int snapshot_ioctl(struct inode *
if (data->frozen)
break;
mutex_lock(&pm_mutex);
- if (freeze_processes()) {
- thaw_processes();
- error = -EBUSY;
+ error = pm_notifier_call_chain(PM_PRE_FREEZE);
+ if (!error) {
+ error = freeze_processes();
+ if (error) {
+ thaw_processes();
+ pm_notifier_call_chain(PM_POST_THAW);
+ }
}
mutex_unlock(&pm_mutex);
if (!error)
@@ -163,6 +167,7 @@ static int snapshot_ioctl(struct inode *
break;
mutex_lock(&pm_mutex);
thaw_processes();
+ pm_notifier_call_chain(PM_POST_THAW);
mutex_unlock(&pm_mutex);
data->frozen = 0;
break;
Index: linux-2.6.22-rc3/Documentation/power/notifiers.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22-rc3/Documentation/power/notifiers.txt 2007-05-29 23:04:20.000000000 +0200
@@ -0,0 +1,57 @@
+Suspend notifiers
+ (C) 2007 Rafael J. Wysocki <[email protected]>, GPL
+
+There are some operations that device drivers may want to carry out in their
+.suspend() routines, but shouldn't, because they can cause the hibernation or
+suspend to fail. For example, a driver may want to allocate a substantial amount
+of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
+swsusp's memory shrinker has run.
+
+Also, there may be some operations, that subsystems want to carry out before a
+hibernation/suspend or after a restore/resume, requiring the system to be fully
+functional, so the drivers' .suspend() and .resume() routines are not suitable
+for this purpose. For example, device drivers may want to upload firmware to
+their devices after a restore from a hibernation image, but they cannot do it by
+calling request_firmware() from their .resume() routines (user land processes
+are frozen at this point). The solution may be to load the firmware into
+memory before processes are frozen and upload it from there in the .resume()
+routine. Of course, a hibernation notifier may be used for this purpose.
+
+The subsystems that have such needs can register suspend notifiers that will be
+called upon the following events by the suspend core:
+
+PM_PRE_FREEZE The system is going to hibernate or suspend, tasks will
+ be frozen immediately
+
+PM_POST_THAW Tasks have just been thawed after a resume or restore
+ from a hibernation image
+
+PM_HIBERNATION_PREPARE The system is preparing for hibernation. Tasks have
+ been frozen, memory is going to be freed and devices
+ are going to be suspended.
+
+PM_POST_HIBERNATION The system memory state has been restored from a
+ hibernation image or an error occured during the
+ hibernation. Device drivers' .resume() callbacks have
+ been executed, tasks will be thawed immediately.
+
+PM_SUSPEND_PREPARE The system is preparing for a suspend. Tasks have been
+ frozen, devices are going to be suspended.
+
+PM_POST_SUSPEND The system has just resumed or an error occured during
+ the suspend. Device drivers' .resume() callbacks have
+ been executed, tasks will be thawed immediately.
+
+It is generally assumed that whatever the notifiers do for PM_PRE_FREEZE,
+should be undone for PM_POST_THAW. Moreover, what is done for
+PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION (eg. if memory
+is allocated for PM_HIBERNATION_PREPARE, it should be freed for
+PM_POST_HIBERNATION). Analogously, operations performed for PM_SUSPEND_PREPARE
+should be reversed for PM_POST_SUSPEND.
+
+The hibernation and suspend notifiers are called with pm_mutex held. They are
+defined in the usual way, but their last argument is meaningless (it is always
+NULL). To register and/or unregister a suspend notifier use the functions
+register_pm_notifier() and unregister_pm_notifier(), respectively, defined in
+include/linux/suspend.h . If you don't need to unregister the notifier, you can
+also use the pm_notifier() macro defined in include/linux/suspend.h .
On Monday 28 May 2007 4:48 am, Michael-Luke Jones wrote:
> On 28 May 2007, at 08:43, Rafael J. Wysocki wrote:
>
> >> Seems, that's just the broken synchronous firmware loading interface
> >> with the useless timeout handling. The nowait version of the same
> >> loader
> >> doesn't time out, and should not have that problem. The sync version
> >> should be removed from the kernel, it just causes all sorts of
> >> problems
> >> since it exists.
> >>
> >> Userspace should handle the async request just fine when it comes
> >> back
> >> running, regardless of the time it was submitted.
> >
> > Okay, so the solution is to convert the drivers to use
> > request_firmware_nowait() instead of request_firmware() in
> > their .resume()
> > routines.
>
> [added Rob Landley to CC]
>
> I think asynchronous loading should be made the default behaviour.
> However, we need to think and document how to do firmware loading.
I'd love to write up documentation on this if anybody can tell me what it
should say. :)
> Firmware loading can be done at two different times:
> Device Driver Load
> First use of Device Driver
>
> For a network device, this correlates to when the driver is first
> loaded into memory or at 'ifconfig up' respectively.
And for block devices ala:
https://bugs.launchpad.net/ubuntu/dapper/+source/initramfs-tools/+bug/74004
https://issues.rpath.com/browse/RPL-1350
The first use could be "mount" or open of the block device under /dev. So now
you have the mount and open syscalls returning -ENOFIRMWARE. This is not a
small change.
> At device driver load, firmware loading must be asynchronous. This is
> because device driver init can occur before userspace runs and
> registers a hotplug/uevent event handler.
Userspace doesn't have to register a hotplug handler: /sys/hotplug is the
default and if there is one in initramfs it should get called. You shouldn't
have to wait for init to run for this to be the case.
I believe the initramfs cpio archive used to get extracted just before the
attempt to exec /init out of it, and that it was moved to much earlier in the
boot process so that firmware loading could be done out of it for statically
linked drivers.
Look at do_basic_setup() in init/main.c: notice how usermodehelper_init() gets
called right before driver_init(). Ask yourself: "why did it do that"?
Notice how rest_init() forks the first kernel thread (PID 1) to run
kernel_init(), and then becomes PID 0 (the idle task). So from kernel_init()
it's ok to spawn all the new tasks we want because they can't take any
reserved PIDs.
> If device use is attempted
> before firmware loads, a -ENOFIRMWARE call would be great so that
> userspace and thus the user can be clearly informed what the problem is.
Because of course every userspace utility in the world will immediately be
rewritten to provide clear and informative error messages for this race
condition corner case.
Somehow, I can't bring myself to believe this.
> However, at 'first use' firmware loading, the synchronous interface
> should remain. 'ifconfig up' either works or it doesn't, and as I see
> it, has to just hang around until firmware turns up.
Why would you make two different code paths for module load and first use?
> Documentation for how hotplug/uevent handlers should cope with these
> types of firmware loading is also *strongly* requested, in order for
> lightweight but fully functional implementations to be made.
Happy to. I'm just trying to figure out what the correct behavior is so I can
document it.
Rob
Hi!
> +Suspend notifiers
> + (C) 2007 Rafael J. Wysocki <[email protected]>, GPL
> +
> +There are some operations that device drivers may want to carry out in their
> +.suspend() routines, but shouldn't, because they can cause the hibernation or
> +suspend to fail. For example, a driver may want to allocate a substantial amount
> +of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
> +swsusp's memory shrinker has run.
> +
> +Also, there may be some operations, that subsystems want to carry out before a
> +hibernation/suspend or after a restore/resume, requiring the system to be fully
> +functional, so the drivers' .suspend() and .resume() routines are not suitable
> +for this purpose. For example, device drivers may want to upload firmware to
> +their devices after a restore from a hibernation image, but they cannot do it by
> +calling request_firmware() from their .resume() routines (user land processes
> +are frozen at this point). The solution may be to load the firmware into
> +memory before processes are frozen and upload it from there in the .resume()
> +routine. Of course, a hibernation notifier may be used for this purpose.
> +
> +The subsystems that have such needs can register suspend notifiers that will be
> +called upon the following events by the suspend core:
> +
> +PM_PRE_FREEZE The system is going to hibernate or suspend, tasks will
> + be frozen immediately
Hmm, looks like bad idea if we are going to remove freezer from
suspend...?
> +PM_POST_THAW Tasks have just been thawed after a resume or restore
> + from a hibernation image
> +
> +PM_HIBERNATION_PREPARE The system is preparing for hibernation. Tasks have
> + been frozen, memory is going to be freed and devices
> + are going to be suspended.
Is not PRE_FREEZE enough? We can allocate memory for drivers there,
too...
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Hi!
> > >> Userspace should handle the async request just fine when it comes
> > >> back
> > >> running, regardless of the time it was submitted.
> > >
> > > Okay, so the solution is to convert the drivers to use
> > > request_firmware_nowait() instead of request_firmware() in
> > > their .resume()
> > > routines.
> >
> > [added Rob Landley to CC]
> >
> > I think asynchronous loading should be made the default behaviour.
> > However, we need to think and document how to do firmware loading.
>
> I'd love to write up documentation on this if anybody can tell me what it
> should say. :)
Well, chapter on firmware & suspend/hibernation should say something
about either loading the firmware during early suspend, or just
keeping firmware in ram from boot...
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Hi,
On Wednesday, 30 May 2007 17:37, Pavel Machek wrote:
> Hi!
>
> > +Suspend notifiers
> > + (C) 2007 Rafael J. Wysocki <[email protected]>, GPL
> > +
> > +There are some operations that device drivers may want to carry out in their
> > +.suspend() routines, but shouldn't, because they can cause the hibernation or
> > +suspend to fail. For example, a driver may want to allocate a substantial amount
> > +of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
> > +swsusp's memory shrinker has run.
> > +
> > +Also, there may be some operations, that subsystems want to carry out before a
> > +hibernation/suspend or after a restore/resume, requiring the system to be fully
> > +functional, so the drivers' .suspend() and .resume() routines are not suitable
> > +for this purpose. For example, device drivers may want to upload firmware to
> > +their devices after a restore from a hibernation image, but they cannot do it by
> > +calling request_firmware() from their .resume() routines (user land processes
> > +are frozen at this point). The solution may be to load the firmware into
> > +memory before processes are frozen and upload it from there in the .resume()
> > +routine. Of course, a hibernation notifier may be used for this purpose.
> > +
> > +The subsystems that have such needs can register suspend notifiers that will be
> > +called upon the following events by the suspend core:
> > +
> > +PM_PRE_FREEZE The system is going to hibernate or suspend, tasks will
> > + be frozen immediately
>
> Hmm, looks like bad idea if we are going to remove freezer from
> suspend...?
We need PM_PRE_FREEZE anyway and it's a different question whether or not
it'll be used for suspend (STR) too.
The timing is not the best one, but so far the freezer is in the suspend code
path and I need to take this into account.
> > +PM_POST_THAW Tasks have just been thawed after a resume or restore
> > + from a hibernation image
> > +
> > +PM_HIBERNATION_PREPARE The system is preparing for hibernation. Tasks have
> > + been frozen, memory is going to be freed and devices
> > + are going to be suspended.
>
> Is not PRE_FREEZE enough? We can allocate memory for drivers there,
> too...
Well, there is a reason for not doing this. Namely, if the memory if freed on
PM_POST_HIBERNATION after the image has been created, we can use it for saving
the image (and speed up the saving).
Besides, if the freezer is dropped from the suspend code, the notifiers will be
useful to it anyway IMO, and PRE_FREEZE won't make sense in that case.
I think the rule should be: If you need to do something _before_ tasks are
frozen, do it in PM_PRE_FREEZE, but if you can do that after the tasks have
been frozen, do it on PM_HIBERNATION_PREPARE (or PM_SUSPEND_PREPARE in the
suspend case).
Greetings,
Rafael
On Wednesday, 30 May 2007 22:44, Rafael J. Wysocki wrote:
> Hi,
>
> On Wednesday, 30 May 2007 17:37, Pavel Machek wrote:
> > Hi!
> >
> > > +Suspend notifiers
> > > + (C) 2007 Rafael J. Wysocki <[email protected]>, GPL
> > > +
> > > +There are some operations that device drivers may want to carry out in their
> > > +.suspend() routines, but shouldn't, because they can cause the hibernation or
> > > +suspend to fail. For example, a driver may want to allocate a substantial amount
> > > +of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
> > > +swsusp's memory shrinker has run.
> > > +
> > > +Also, there may be some operations, that subsystems want to carry out before a
> > > +hibernation/suspend or after a restore/resume, requiring the system to be fully
> > > +functional, so the drivers' .suspend() and .resume() routines are not suitable
> > > +for this purpose. For example, device drivers may want to upload firmware to
> > > +their devices after a restore from a hibernation image, but they cannot do it by
> > > +calling request_firmware() from their .resume() routines (user land processes
> > > +are frozen at this point). The solution may be to load the firmware into
> > > +memory before processes are frozen and upload it from there in the .resume()
> > > +routine. Of course, a hibernation notifier may be used for this purpose.
> > > +
> > > +The subsystems that have such needs can register suspend notifiers that will be
> > > +called upon the following events by the suspend core:
> > > +
> > > +PM_PRE_FREEZE The system is going to hibernate or suspend, tasks will
> > > + be frozen immediately
> >
> > Hmm, looks like bad idea if we are going to remove freezer from
> > suspend...?
>
> We need PM_PRE_FREEZE anyway and it's a different question whether or not
> it'll be used for suspend (STR) too.
>
> The timing is not the best one, but so far the freezer is in the suspend code
> path and I need to take this into account.
>
> > > +PM_POST_THAW Tasks have just been thawed after a resume or restore
> > > + from a hibernation image
> > > +
> > > +PM_HIBERNATION_PREPARE The system is preparing for hibernation. Tasks have
> > > + been frozen, memory is going to be freed and devices
> > > + are going to be suspended.
> >
> > Is not PRE_FREEZE enough? We can allocate memory for drivers there,
> > too...
>
> Well, there is a reason for not doing this. Namely, if the memory if freed on
> PM_POST_HIBERNATION after the image has been created, we can use it for saving
> the image (and speed up the saving).
>
> Besides, if the freezer is dropped from the suspend code, the notifiers will be
> useful to it anyway IMO, and PRE_FREEZE won't make sense in that case.
>
> I think the rule should be: If you need to do something _before_ tasks are
> frozen, do it in PM_PRE_FREEZE, but if you can do that after the tasks have
> been frozen, do it on PM_HIBERNATION_PREPARE (or PM_SUSPEND_PREPARE in the
> suspend case).
OTOH, having considered it for a while, I think that for now I can add just
PM_PRE_FREEZE and PM_POST_THAW, as I don't have any user for the other ones.
The other events may be added in the future if need be (along with some users).
I'll post revised patches in a new thread.
Greetings,
Rafael
Hi.
On Wed, 2007-05-30 at 23:11 +0200, Rafael J. Wysocki wrote:
> On Wednesday, 30 May 2007 22:44, Rafael J. Wysocki wrote:
> > Hi,
> >
> > On Wednesday, 30 May 2007 17:37, Pavel Machek wrote:
> > > Hi!
> > >
> > > > +Suspend notifiers
> > > > + (C) 2007 Rafael J. Wysocki <[email protected]>, GPL
> > > > +
> > > > +There are some operations that device drivers may want to carry out in their
> > > > +.suspend() routines, but shouldn't, because they can cause the hibernation or
> > > > +suspend to fail. For example, a driver may want to allocate a substantial amount
> > > > +of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
> > > > +swsusp's memory shrinker has run.
> > > > +
> > > > +Also, there may be some operations, that subsystems want to carry out before a
> > > > +hibernation/suspend or after a restore/resume, requiring the system to be fully
> > > > +functional, so the drivers' .suspend() and .resume() routines are not suitable
> > > > +for this purpose. For example, device drivers may want to upload firmware to
> > > > +their devices after a restore from a hibernation image, but they cannot do it by
> > > > +calling request_firmware() from their .resume() routines (user land processes
> > > > +are frozen at this point). The solution may be to load the firmware into
> > > > +memory before processes are frozen and upload it from there in the .resume()
> > > > +routine. Of course, a hibernation notifier may be used for this purpose.
> > > > +
> > > > +The subsystems that have such needs can register suspend notifiers that will be
> > > > +called upon the following events by the suspend core:
> > > > +
> > > > +PM_PRE_FREEZE The system is going to hibernate or suspend, tasks will
> > > > + be frozen immediately
> > >
> > > Hmm, looks like bad idea if we are going to remove freezer from
> > > suspend...?
> >
> > We need PM_PRE_FREEZE anyway and it's a different question whether or not
> > it'll be used for suspend (STR) too.
> >
> > The timing is not the best one, but so far the freezer is in the suspend code
> > path and I need to take this into account.
> >
> > > > +PM_POST_THAW Tasks have just been thawed after a resume or restore
> > > > + from a hibernation image
> > > > +
> > > > +PM_HIBERNATION_PREPARE The system is preparing for hibernation. Tasks have
> > > > + been frozen, memory is going to be freed and devices
> > > > + are going to be suspended.
> > >
> > > Is not PRE_FREEZE enough? We can allocate memory for drivers there,
> > > too...
> >
> > Well, there is a reason for not doing this. Namely, if the memory if freed on
> > PM_POST_HIBERNATION after the image has been created, we can use it for saving
> > the image (and speed up the saving).
> >
> > Besides, if the freezer is dropped from the suspend code, the notifiers will be
> > useful to it anyway IMO, and PRE_FREEZE won't make sense in that case.
> >
> > I think the rule should be: If you need to do something _before_ tasks are
> > frozen, do it in PM_PRE_FREEZE, but if you can do that after the tasks have
> > been frozen, do it on PM_HIBERNATION_PREPARE (or PM_SUSPEND_PREPARE in the
> > suspend case).
>
> OTOH, having considered it for a while, I think that for now I can add just
> PM_PRE_FREEZE and PM_POST_THAW, as I don't have any user for the other ones.
> The other events may be added in the future if need be (along with some users).
>
> I'll post revised patches in a new thread.
I haven't been giving this much attention, so forgive me if I'm about to
ask a silly question... which notifiers would you see the avenrun
saving/restoring using?
Regards,
Nigel
Hi!
> > I think the rule should be: If you need to do something _before_ tasks are
> > frozen, do it in PM_PRE_FREEZE, but if you can do that after the tasks have
> > been frozen, do it on PM_HIBERNATION_PREPARE (or PM_SUSPEND_PREPARE in the
> > suspend case).
>
> OTOH, having considered it for a while, I think that for now I can add just
> PM_PRE_FREEZE and PM_POST_THAW, as I don't have any user for the other ones.
> The other events may be added in the future if need be (along with some users).
>
> I'll post revised patches in a new thread.
Yes please.
(Hmm, but you may create 4 of them PM_PRE_HIBERNATION, PM_PRE_SUSPEND,
PM_POST_HIBERNATION, PM_POST_SUSPEND -- I'd call all of them before
freeze and after thaw, but if we drop freezer for suspend, we won't
have to rename anything.)
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Thursday, 31 May 2007 00:29, Pavel Machek wrote:
> Hi!
>
> > > I think the rule should be: If you need to do something _before_ tasks are
> > > frozen, do it in PM_PRE_FREEZE, but if you can do that after the tasks have
> > > been frozen, do it on PM_HIBERNATION_PREPARE (or PM_SUSPEND_PREPARE in the
> > > suspend case).
> >
> > OTOH, having considered it for a while, I think that for now I can add just
> > PM_PRE_FREEZE and PM_POST_THAW, as I don't have any user for the other ones.
> > The other events may be added in the future if need be (along with some users).
> >
> > I'll post revised patches in a new thread.
>
> Yes please.
>
> (Hmm, but you may create 4 of them PM_PRE_HIBERNATION, PM_PRE_SUSPEND,
> PM_POST_HIBERNATION, PM_POST_SUSPEND -- I'd call all of them before
> freeze and after thaw, but if we drop freezer for suspend, we won't
> have to rename anything.)
Yes, that's a good idea. :-)
Greetings,
Rafael
Hi,
On Thursday, 31 May 2007 00:24, Nigel Cunningham wrote:
> Hi.
>
> On Wed, 2007-05-30 at 23:11 +0200, Rafael J. Wysocki wrote:
> > On Wednesday, 30 May 2007 22:44, Rafael J. Wysocki wrote:
> > > Hi,
> > >
> > > On Wednesday, 30 May 2007 17:37, Pavel Machek wrote:
> > > > Hi!
> > > >
> > > > > +Suspend notifiers
> > > > > + (C) 2007 Rafael J. Wysocki <[email protected]>, GPL
> > > > > +
> > > > > +There are some operations that device drivers may want to carry out in their
> > > > > +.suspend() routines, but shouldn't, because they can cause the hibernation or
> > > > > +suspend to fail. For example, a driver may want to allocate a substantial amount
> > > > > +of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
> > > > > +swsusp's memory shrinker has run.
> > > > > +
> > > > > +Also, there may be some operations, that subsystems want to carry out before a
> > > > > +hibernation/suspend or after a restore/resume, requiring the system to be fully
> > > > > +functional, so the drivers' .suspend() and .resume() routines are not suitable
> > > > > +for this purpose. For example, device drivers may want to upload firmware to
> > > > > +their devices after a restore from a hibernation image, but they cannot do it by
> > > > > +calling request_firmware() from their .resume() routines (user land processes
> > > > > +are frozen at this point). The solution may be to load the firmware into
> > > > > +memory before processes are frozen and upload it from there in the .resume()
> > > > > +routine. Of course, a hibernation notifier may be used for this purpose.
> > > > > +
> > > > > +The subsystems that have such needs can register suspend notifiers that will be
> > > > > +called upon the following events by the suspend core:
> > > > > +
> > > > > +PM_PRE_FREEZE The system is going to hibernate or suspend, tasks will
> > > > > + be frozen immediately
> > > >
> > > > Hmm, looks like bad idea if we are going to remove freezer from
> > > > suspend...?
> > >
> > > We need PM_PRE_FREEZE anyway and it's a different question whether or not
> > > it'll be used for suspend (STR) too.
> > >
> > > The timing is not the best one, but so far the freezer is in the suspend code
> > > path and I need to take this into account.
> > >
> > > > > +PM_POST_THAW Tasks have just been thawed after a resume or restore
> > > > > + from a hibernation image
> > > > > +
> > > > > +PM_HIBERNATION_PREPARE The system is preparing for hibernation. Tasks have
> > > > > + been frozen, memory is going to be freed and devices
> > > > > + are going to be suspended.
> > > >
> > > > Is not PRE_FREEZE enough? We can allocate memory for drivers there,
> > > > too...
> > >
> > > Well, there is a reason for not doing this. Namely, if the memory if freed on
> > > PM_POST_HIBERNATION after the image has been created, we can use it for saving
> > > the image (and speed up the saving).
> > >
> > > Besides, if the freezer is dropped from the suspend code, the notifiers will be
> > > useful to it anyway IMO, and PRE_FREEZE won't make sense in that case.
> > >
> > > I think the rule should be: If you need to do something _before_ tasks are
> > > frozen, do it in PM_PRE_FREEZE, but if you can do that after the tasks have
> > > been frozen, do it on PM_HIBERNATION_PREPARE (or PM_SUSPEND_PREPARE in the
> > > suspend case).
> >
> > OTOH, having considered it for a while, I think that for now I can add just
> > PM_PRE_FREEZE and PM_POST_THAW, as I don't have any user for the other ones.
> > The other events may be added in the future if need be (along with some users).
> >
> > I'll post revised patches in a new thread.
>
> I haven't been giving this much attention, so forgive me if I'm about to
> ask a silly question... which notifiers would you see the avenrun
> saving/restoring using?
I'm not sure what you mean. Could you please clarify?
Greetings,
Rafael
Hi!
> > > I'll post revised patches in a new thread.
> >
> > I haven't been giving this much attention, so forgive me if I'm about to
> > ask a silly question... which notifiers would you see the avenrun
> > saving/restoring using?
>
> I'm not sure what you mean. Could you please clarify?
Remember that patch about saving/restoring "load average"? I guess
Nigel asks where to hook it.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Hi,
On Thursday, 31 May 2007 16:23, Pavel Machek wrote:
> Hi!
>
> > > > I'll post revised patches in a new thread.
> > >
> > > I haven't been giving this much attention, so forgive me if I'm about to
> > > ask a silly question... which notifiers would you see the avenrun
> > > saving/restoring using?
> >
> > I'm not sure what you mean. Could you please clarify?
>
> Remember that patch about saving/restoring "load average"? I guess
> Nigel asks where to hook it.
Ah, okay, thanks.
Hmm, that would be PM_PRE_HIBERNATION, I think.
Rafael
Hi.
On Thu, 2007-05-31 at 22:02 +0200, Rafael J. Wysocki wrote:
> Hi,
>
> On Thursday, 31 May 2007 16:23, Pavel Machek wrote:
> > Hi!
> >
> > > > > I'll post revised patches in a new thread.
> > > >
> > > > I haven't been giving this much attention, so forgive me if I'm about to
> > > > ask a silly question... which notifiers would you see the avenrun
> > > > saving/restoring using?
> > >
> > > I'm not sure what you mean. Could you please clarify?
> >
> > Remember that patch about saving/restoring "load average"? I guess
> > Nigel asks where to hook it.
>
> Ah, okay, thanks.
>
> Hmm, that would be PM_PRE_HIBERNATION, I think.
Yeah. Okee doke.
Thanks!
Nigel
Hi,
This is the second revision of the patches that introduce hibernation and
suspend notifiers.
Generally, I have followed the Alan's suggestion to use a blocking notifier
chain and the Pavel's suggestion to limit the number of events. Also, I've
dropped the patch to disable the requesting of firmware.
Comments welcome.
Greetings,
Rafael
--
"Premature optimization is the root of all evil." - Donald Knuth
From: Rafael J. Wysocki <[email protected]>
Use a hibernation and suspend notifier to disable the user mode helper before
a hibernation/suspend and enable it after the operation.
Signed-off-by: Rafael J. Wysocki <[email protected]>
---
kernel/kmod.c | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
Index: linux-2.6.22-rc3/kernel/kmod.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/kmod.c 2007-05-31 00:00:37.000000000 +0200
+++ linux-2.6.22-rc3/kernel/kmod.c 2007-06-02 00:01:47.000000000 +0200
@@ -33,6 +33,8 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/resource.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
#include <asm/uaccess.h>
extern int max_threads;
@@ -46,6 +48,14 @@ static struct workqueue_struct *khelper_
*/
char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
+/*
+ * If set, both call_usermodehelper_keys() and call_usermodehelper_pipe() exit
+ * immediately returning -EBUSY. Used for preventing user land processes from
+ * being created after the user land has been frozen during a system-wide
+ * hibernation or suspend operation.
+ */
+static int usermodehelper_disabled;
+
/**
* request_module - try to load a kernel module
* @fmt: printf style format string for the name of the module
@@ -251,6 +261,24 @@ static void __call_usermodehelper(struct
complete(sub_info->complete);
}
+static int usermodehelper_pm_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *ignored)
+{
+ switch (action) {
+ case PM_HIBERNATION_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ usermodehelper_disabled = 1;
+ return NOTIFY_OK;
+ case PM_POST_HIBERNATION:
+ case PM_POST_SUSPEND:
+ usermodehelper_disabled = 0;
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
/**
* call_usermodehelper_keys - start a usermode application
* @path: pathname for the application
@@ -276,7 +304,7 @@ int call_usermodehelper_keys(char *path,
struct subprocess_info *sub_info;
int retval;
- if (!khelper_wq)
+ if (!khelper_wq || usermodehelper_disabled)
return -EBUSY;
if (path[0] == '\0')
@@ -319,7 +347,7 @@ int call_usermodehelper_pipe(char *path,
};
struct file *f;
- if (!khelper_wq)
+ if (!khelper_wq || usermodehelper_disabled)
return -EBUSY;
if (path[0] == '\0')
@@ -347,4 +375,5 @@ void __init usermodehelper_init(void)
{
khelper_wq = create_singlethread_workqueue("khelper");
BUG_ON(!khelper_wq);
+ pm_notifier(usermodehelper_pm_callback, 0);
}
From: Rafael J. Wysocki <[email protected]>
Make it possible to register hibernation and suspend notifiers, so that
subsystems can perform hibernation-related or suspend-related operations that
should not be carried out by device drivers' .suspend() and .resume() routines.
Signed-off-by: Rafael J. Wysocki <[email protected]>
---
Documentation/power/notifiers.txt | 50 ++++++++++++++++++++++++++++++++++++++
include/linux/notifier.h | 6 ++++
include/linux/suspend.h | 37 +++++++++++++++++++++++++---
kernel/power/disk.c | 16 +++++++++---
kernel/power/main.c | 9 ++++++
kernel/power/power.h | 10 +++++++
kernel/power/user.c | 11 ++++++--
7 files changed, 129 insertions(+), 10 deletions(-)
Index: linux-2.6.22-rc3/include/linux/suspend.h
===================================================================
--- linux-2.6.22-rc3.orig/include/linux/suspend.h 2007-05-31 00:00:38.000000000 +0200
+++ linux-2.6.22-rc3/include/linux/suspend.h 2007-06-01 22:55:01.000000000 +0200
@@ -54,7 +54,8 @@ struct hibernation_ops {
void (*restore_cleanup)(void);
};
-#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
+#ifdef CONFIG_PM
+#ifdef CONFIG_SOFTWARE_SUSPEND
/* kernel/power/snapshot.c */
extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
static inline void register_nosave_region(unsigned long b, unsigned long e)
@@ -72,7 +73,7 @@ extern unsigned long get_safe_page(gfp_t
extern void hibernation_set_ops(struct hibernation_ops *ops);
extern int hibernate(void);
-#else
+#else /* CONFIG_SOFTWARE_SUSPEND */
static inline void register_nosave_region(unsigned long b, unsigned long e) {}
static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
@@ -81,7 +82,7 @@ static inline void swsusp_unset_page_fre
static inline void hibernation_set_ops(struct hibernation_ops *ops) {}
static inline int hibernate(void) { return -ENOSYS; }
-#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */
+#endif /* CONFIG_SOFTWARE_SUSPEND */
void save_processor_state(void);
void restore_processor_state(void);
@@ -89,4 +90,34 @@ struct saved_context;
void __save_processor_state(struct saved_context *ctxt);
void __restore_processor_state(struct saved_context *ctxt);
+/* kernel/power/main.c */
+extern struct blocking_notifier_head pm_chain_head;
+
+static inline int register_pm_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&pm_chain_head, nb);
+}
+
+static inline int unregister_pm_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&pm_chain_head, nb);
+}
+
+#define pm_notifier(fn, pri) { \
+ static struct notifier_block fn##_nb = \
+ { .notifier_call = fn, .priority = pri }; \
+ register_pm_notifier(&fn##_nb); \
+}
+#else /* CONFIG_PM */
+static inline int register_pm_notifier(struct notifier_block *nb) {
+ return 0;
+}
+
+static inline int unregister_pm_notifier(struct notifier_block *nb) {
+ return 0;
+}
+
+#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
+#endif /* CONFIG_PM */
+
#endif /* _LINUX_SWSUSP_H */
Index: linux-2.6.22-rc3/kernel/power/power.h
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/power.h 2007-05-31 00:00:38.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/power.h 2007-06-01 22:55:01.000000000 +0200
@@ -173,5 +173,15 @@ extern void swsusp_close(void);
extern int suspend_enter(suspend_state_t state);
struct timeval;
+/* kernel/power/swsusp.c */
extern void swsusp_show_speed(struct timeval *, struct timeval *,
unsigned int, char *);
+
+/* kernel/power/main.c */
+extern struct blocking_notifier_head pm_chain_head;
+
+static inline int pm_notifier_call_chain(unsigned long val)
+{
+ return (blocking_notifier_call_chain(&pm_chain_head, val, NULL)
+ == NOTIFY_BAD) ? -EINVAL : 0;
+}
Index: linux-2.6.22-rc3/include/linux/notifier.h
===================================================================
--- linux-2.6.22-rc3.orig/include/linux/notifier.h 2007-05-31 00:00:38.000000000 +0200
+++ linux-2.6.22-rc3/include/linux/notifier.h 2007-06-01 23:01:38.000000000 +0200
@@ -209,5 +209,11 @@ extern int __srcu_notifier_call_chain(st
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
+/* Hibernation and suspend events */
+#define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */
+#define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */
+#define PM_SUSPEND_PREPARE 0x0003 /* Going to suspend the system */
+#define PM_POST_SUSPEND 0x0004 /* Suspend finished */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NOTIFIER_H */
Index: linux-2.6.22-rc3/kernel/power/disk.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/disk.c 2007-05-31 00:00:38.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/disk.c 2007-06-01 23:05:11.000000000 +0200
@@ -281,9 +281,16 @@ int hibernate(void)
{
int error;
+ mutex_lock(&pm_mutex);
/* The snapshot device should not be opened while we're running */
- if (!atomic_add_unless(&snapshot_device_available, -1, 0))
- return -EBUSY;
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ error = -EBUSY;
+ goto Unlock;
+ }
+
+ error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+ if (error)
+ goto Exit;
/* Allocate memory management structures */
error = create_basic_memory_bitmaps();
@@ -294,7 +301,6 @@ int hibernate(void)
if (error)
goto Finish;
- mutex_lock(&pm_mutex);
if (hibernation_mode == HIBERNATION_TESTPROC) {
printk("swsusp debug: Waiting for 5 seconds.\n");
mdelay(5000);
@@ -316,12 +322,14 @@ int hibernate(void)
swsusp_free();
}
Thaw:
- mutex_unlock(&pm_mutex);
unprepare_processes();
Finish:
free_basic_memory_bitmaps();
Exit:
+ pm_notifier_call_chain(PM_POST_HIBERNATION);
atomic_inc(&snapshot_device_available);
+ Unlock:
+ mutex_unlock(&pm_mutex);
return error;
}
Index: linux-2.6.22-rc3/kernel/power/main.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/main.c 2007-05-31 00:00:38.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/main.c 2007-06-01 23:08:43.000000000 +0200
@@ -24,6 +24,8 @@
#include "power.h"
+BLOCKING_NOTIFIER_HEAD(pm_chain_head);
+
/*This is just an arbitrary number */
#define FREE_PAGE_NUMBER (100)
@@ -82,6 +84,10 @@ static int suspend_prepare(suspend_state
if (!pm_ops || !pm_ops->enter)
return -EPERM;
+ error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+ if (error)
+ goto Finish;
+
pm_prepare_console();
if (freeze_processes()) {
@@ -124,6 +130,8 @@ static int suspend_prepare(suspend_state
Thaw:
thaw_processes();
pm_restore_console();
+ Finish:
+ pm_notifier_call_chain(PM_POST_SUSPEND);
return error;
}
@@ -175,6 +183,7 @@ static void suspend_finish(suspend_state
resume_console();
thaw_processes();
pm_restore_console();
+ pm_notifier_call_chain(PM_POST_SUSPEND);
}
Index: linux-2.6.22-rc3/kernel/power/user.c
===================================================================
--- linux-2.6.22-rc3.orig/kernel/power/user.c 2007-05-31 00:00:38.000000000 +0200
+++ linux-2.6.22-rc3/kernel/power/user.c 2007-06-01 23:15:58.000000000 +0200
@@ -149,10 +149,14 @@ static int snapshot_ioctl(struct inode *
if (data->frozen)
break;
mutex_lock(&pm_mutex);
- if (freeze_processes()) {
- thaw_processes();
- error = -EBUSY;
+ error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+ if (!error) {
+ error = freeze_processes();
+ if (error)
+ thaw_processes();
}
+ if (error)
+ pm_notifier_call_chain(PM_POST_HIBERNATION);
mutex_unlock(&pm_mutex);
if (!error)
data->frozen = 1;
@@ -163,6 +167,7 @@ static int snapshot_ioctl(struct inode *
break;
mutex_lock(&pm_mutex);
thaw_processes();
+ pm_notifier_call_chain(PM_POST_HIBERNATION);
mutex_unlock(&pm_mutex);
data->frozen = 0;
break;
Index: linux-2.6.22-rc3/Documentation/power/notifiers.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.22-rc3/Documentation/power/notifiers.txt 2007-06-01 23:14:59.000000000 +0200
@@ -0,0 +1,50 @@
+Suspend notifiers
+ (C) 2007 Rafael J. Wysocki <[email protected]>, GPL
+
+There are some operations that device drivers may want to carry out in their
+.suspend() routines, but shouldn't, because they can cause the hibernation or
+suspend to fail. For example, a driver may want to allocate a substantial amount
+of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
+swsusp's memory shrinker has run.
+
+Also, there may be some operations, that subsystems want to carry out before a
+hibernation/suspend or after a restore/resume, requiring the system to be fully
+functional, so the drivers' .suspend() and .resume() routines are not suitable
+for this purpose. For example, device drivers may want to upload firmware to
+their devices after a restore from a hibernation image, but they cannot do it by
+calling request_firmware() from their .resume() routines (user land processes
+are frozen at this point). The solution may be to load the firmware into
+memory before processes are frozen and upload it from there in the .resume()
+routine. Of course, a hibernation notifier may be used for this purpose.
+
+The subsystems that have such needs can register suspend notifiers that will be
+called upon the following events by the suspend core:
+
+PM_HIBERNATION_PREPARE The system is going to hibernate or suspend, tasks will
+ be frozen immediately.
+
+PM_POST_HIBERNATION The system memory state has been restored from a
+ hibernation image or an error occured during the
+ hibernation. Device drivers' .resume() callbacks have
+ been executed and tasks have been thawed.
+
+PM_SUSPEND_PREPARE The system is preparing for a suspend.
+
+PM_POST_SUSPEND The system has just resumed or an error occured during
+ the suspend. Device drivers' .resume() callbacks have
+ been executed and tasks have been thawed.
+
+It is generally assumed that whatever the notifiers do for
+PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION. Analogously,
+operations performed for PM_SUSPEND_PREPARE should be reversed for
+PM_POST_SUSPEND. Additionally, all of the notifiers are called for
+PM_POST_HIBERNATION if one of them fails for PM_HIBERNATION_PREPARE, and
+all of the notifiers are called for PM_POST_SUSPEND if one of them fails for
+PM_SUSPEND_PREPARE.
+
+The hibernation and suspend notifiers are called with pm_mutex held. They are
+defined in the usual way, but their last argument is meaningless (it is always
+NULL). To register and/or unregister a suspend notifier use the functions
+register_pm_notifier() and unregister_pm_notifier(), respectively, defined in
+include/linux/suspend.h . If you don't need to unregister the notifier, you can
+also use the pm_notifier() macro defined in include/linux/suspend.h .
Hi!
> From: Rafael J. Wysocki <[email protected]>
>
> Make it possible to register hibernation and suspend notifiers, so that
> subsystems can perform hibernation-related or suspend-related operations that
> should not be carried out by device drivers' .suspend() and .resume() routines.
One more question: what will we call in suspend-to-both case? I do not
think we can call SUSPEND_PREPARE with frozen tasks, so just one call
to HIBERNATION_PREPARE?
> Signed-off-by: Rafael J. Wysocki <[email protected]>
Ack.
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Hi!
> From: Rafael J. Wysocki <[email protected]>
>
> Use a hibernation and suspend notifier to disable the user mode helper before
> a hibernation/suspend and enable it after the operation.
>
> Signed-off-by: Rafael J. Wysocki <[email protected]>
ACK.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Sunday, 3 June 2007 18:41, Pavel Machek wrote:
> Hi!
>
> > From: Rafael J. Wysocki <[email protected]>
> >
> > Make it possible to register hibernation and suspend notifiers, so that
> > subsystems can perform hibernation-related or suspend-related operations that
> > should not be carried out by device drivers' .suspend() and .resume() routines.
>
> One more question: what will we call in suspend-to-both case? I do not
> think we can call SUSPEND_PREPARE with frozen tasks, so just one call
> to HIBERNATION_PREPARE?
Yes, right now it should work this way.
> > Signed-off-by: Rafael J. Wysocki <[email protected]>
> Ack.
Thanks!
Rafael
--
"Premature optimization is the root of all evil." - Donald Knuth
On Mon 2007-06-04 00:38:53, Rafael J. Wysocki wrote:
> On Sunday, 3 June 2007 18:41, Pavel Machek wrote:
> > Hi!
> >
> > > From: Rafael J. Wysocki <[email protected]>
> > >
> > > Make it possible to register hibernation and suspend notifiers, so that
> > > subsystems can perform hibernation-related or suspend-related operations that
> > > should not be carried out by device drivers' .suspend() and .resume() routines.
> >
> > One more question: what will we call in suspend-to-both case? I do not
> > think we can call SUSPEND_PREPARE with frozen tasks, so just one call
> > to HIBERNATION_PREPARE?
>
> Yes, right now it should work this way.
(Should it be mentioned in docuementation?)
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Monday, 4 June 2007 00:59, Pavel Machek wrote:
> On Mon 2007-06-04 00:38:53, Rafael J. Wysocki wrote:
> > On Sunday, 3 June 2007 18:41, Pavel Machek wrote:
> > > Hi!
> > >
> > > > From: Rafael J. Wysocki <[email protected]>
> > > >
> > > > Make it possible to register hibernation and suspend notifiers, so that
> > > > subsystems can perform hibernation-related or suspend-related operations that
> > > > should not be carried out by device drivers' .suspend() and .resume() routines.
> > >
> > > One more question: what will we call in suspend-to-both case? I do not
> > > think we can call SUSPEND_PREPARE with frozen tasks, so just one call
> > > to HIBERNATION_PREPARE?
> >
> > Yes, right now it should work this way.
>
> (Should it be mentioned in docuementation?)
Eventually, yes. Still, for now, the STR code in user.c is outdated and needs
fixing. I'll take care of it later this week.
Greetings,
Rafael
--
"Premature optimization is the root of all evil." - Donald Knuth
Hi!
> > > The theoretical answer is that it behaves the way we want. The kernel
> > > thread does selective resumes in response to device requests. If such
> > > a request comes in while the system is asleep it will awaken the
> > > system; so it's only logical that a request coming in while the system
> > > is in the process of going to sleep should abort the suspend.
> >
> > I'd say that it shows ppc being broken. User wanted to suspend the
> > system, and now unrelated task did lsusb... and system will not sleep.
> >
> > AFAICT it is DoS issue -- if one of your users keeps doing lsusb, root
> > will not be able to suspend the system.
>
> This is a matter of one's philosophy. In suspend-to-RAM, should tasks
> be frozen or should I/O queues be frozen?
>
> With the USB subsystem I have followed the approach taken by the PM
> core, which is that tasks are frozen. But one can -- and Linus has on
> at least one occasion -- make a good case that tasks should be left
> running while only I/O is frozen. This would require the subsystem to
> distinguish between a selective device suspend and a system-wide
> suspend-to-RAM, so that selective resume could be enabled on demand in
> one case but not the other.
>
> It's quite doable in principle -- it's just not the technique I used.
I guess we need to do that. Random user should not be able to prevent
machine from sleeping.
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Mon, 4 Jun 2007, Pavel Machek wrote:
> > With the USB subsystem I have followed the approach taken by the PM
> > core, which is that tasks are frozen. But one can -- and Linus has on
> > at least one occasion -- make a good case that tasks should be left
> > running while only I/O is frozen. This would require the subsystem to
> > distinguish between a selective device suspend and a system-wide
> > suspend-to-RAM, so that selective resume could be enabled on demand in
> > one case but not the other.
> >
> > It's quite doable in principle -- it's just not the technique I used.
>
> I guess we need to do that. Random user should not be able to prevent
> machine from sleeping.
Just to be clear about this, let's agree that we're talking about
suspend-to-RAM here, not hibernation.
It boils down to whether we want to freeze user tasks. As I recall,
Linus said that he didn't have any big objection to freezing user
threads; he was much more concerned about freezing kernel threads.
Thanks to Raphael's new notifier chains this will no longer be an
issue, since kernel threads will be able to stop themselves when they
receive a suspend notification.
There may remain some obscure difficulties in discerning whether a
particular thread should be classified as user or kernel, but let's
ignore them.
Even if we don't actively freeze user threads, approximately the same
effect can be achieved in the following way: Change the main kernel
entry points so that any thread performing a system call during a
suspend will get frozen until the suspend is over. Threads that run
entirely in userspace will continue doing useful work as before, and
kernel threads won't be affected at all. (Not that I think it's
necessary to do this; it's just a way to avoid freezing user tasks
until they need it.)
One way or another, freezing user tasks should not be a big deal.
After all, once the suspend is complete eveything will effectively be
frozen anyway. I suppose there might be issues involving tasks which
need to run in order to complete the suspend -- IMO any such issues
should be handled by carrying out the necessary actions before the
point where we now start up the freezer.
The alternative is to have drivers take over the burden. I don't like
this at all. The most obvious disadvantage is that the necessary
checks would have to be duplicated many many times and spread out over
lots of drivers.
It's also harder to handle these things at the driver level. Suppose a
driver gets an I/O request while a suspend is underway. What should it
do? Return an error? Block until the suspend is over? Both
approaches have their difficulties:
Returning an error would mean that suspend is no longer transparent.
Even an error like -EAGAIN.
Waiting until the suspend is over is likely to be impractical. At a
minimum it would involve adding code to drop a lock or mutex, enter the
freezer (or its equivalent), and then restart the I/O operation. And
then, what if the driver was invoked with O_NONBLOCK?
I think it is much better overall to stop I/O requests from being
generated at the source, either by freezing userspace or preventing it
from making system calls. It's hard to imagine that anybody would
miss the small amount of CPU time they'd be giving up by not allowing
user threads to run during the time that a suspend is underway!
Alan Stern
Hi!
> > > With the USB subsystem I have followed the approach taken by the PM
> > > core, which is that tasks are frozen. But one can -- and Linus has on
> > > at least one occasion -- make a good case that tasks should be left
> > > running while only I/O is frozen. This would require the subsystem to
> > > distinguish between a selective device suspend and a system-wide
> > > suspend-to-RAM, so that selective resume could be enabled on demand in
> > > one case but not the other.
> > >
> > > It's quite doable in principle -- it's just not the technique I used.
> >
> > I guess we need to do that. Random user should not be able to prevent
> > machine from sleeping.
>
> Just to be clear about this, let's agree that we're talking about
> suspend-to-RAM here, not hibernation.
Yes.
> It boils down to whether we want to freeze user tasks. As I recall,
> Linus said that he didn't have any big objection to freezing user
> threads; he was much more concerned about freezing kernel threads.
> Thanks to Raphael's new notifier chains this will no longer be an
> issue, since kernel threads will be able to stop themselves when they
> receive a suspend notification.
...
> The alternative is to have drivers take over the burden. I don't like
> this at all. The most obvious disadvantage is that the necessary
> checks would have to be duplicated many many times and spread out over
> lots of drivers.
I like freezer better :-).
> It's also harder to handle these things at the driver level. Suppose a
> driver gets an I/O request while a suspend is underway. What should it
> do? Return an error? Block until the suspend is over? Both
> approaches have their difficulties:
>
> Returning an error would mean that suspend is no longer transparent.
> Even an error like -EAGAIN.
No, -EAGAIN is not nice.
> Waiting until the suspend is over is likely to be impractical. At a
> minimum it would involve adding code to drop a lock or mutex, enter the
> freezer (or its equivalent), and then restart the I/O operation. And
> then, what if the driver was invoked with O_NONBLOCK?
Blocking would be possible option. I agree it is tricky to
implement... it may also be useful for a harddrive:
"I'm riding a horse at 40kph now, so you'll kill the harddrive if you
access it; just freeze everyone until we are at the other end of
meadow".
...hmm, but this seems to be blockdevice specific, and I can't think
of a network or char driver where similar behaviour would be useful.
> I think it is much better overall to stop I/O requests from being
> generated at the source, either by freezing userspace or preventing it
> from making system calls. It's hard to imagine that anybody would
> miss the small amount of CPU time they'd be giving up by not allowing
> user threads to run during the time that a suspend is underway!
Agreed.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html