The following patch fixes a deadlock experienced when devices are
being added to a bus both from a user process and eventd process.
The eventd process was hung waiting on dev->bus->subsys.rwsem which
was held by another process, which was hung since it was calling
call_usermodehelper directly which was hung waiting for work scheduled
on the eventd workqueue to complete. The patch fixes this by delaying
the kobject_hotplug work, running it from eventd if possible.
Backtraces of the hang:
0xc0000000017df300 1 0 0 0 D 0xc0000000017df7b0
swapper
SP(esp) PC(eip) Function(args)
0xc00000003fc9f460 0x0000000000000000 NO_SYMBOL or Userspace
0xc00000003fc9f4f0 0xc000000000058c40 .schedule +0xb4
0xc00000003fc9f5c0 0xc00000000005a464 .wait_for_completion +0x138
0xc00000003fc9f6c0 0xc00000000007c594 .call_usermodehelper +0x104
0xc00000003fc9f810 0xc00000000022d3e8 .kobject_hotplug +0x3c4
0xc00000003fc9f900 0xc00000000022d67c .kobject_add +0x134
0xc00000003fc9f9a0 0xc00000000012b3d8 .register_disk +0x70
0xc00000003fc9fa40 0xc00000000027dfe4 .add_disk +0x60
0xc00000003fc9fad0 0xc0000000002dc7dc .sd_probe +0x290
0xc00000003fc9fb80 0xc00000000026fbe8 .bus_match +0x94
0xc00000003fc9fc10 0xc00000000026ff70 .driver_attach +0x8c
0xc00000003fc9fca0 0xc000000000270104 .bus_add_driver +0x110
0xc00000003fc9fd50 0xc000000000270a18 .driver_register +0x38
0xc00000003fc9fdd0 0xc0000000002cd8f8 .scsi_register_driver +0x28
0xc00000003fc9fe50 0xc0000000004941d8 .init_sd +0x8c
0xc00000003fc9fee0 0xc00000000000c720 .init +0x25c
0xc00000003fc9ff90 0xc0000000000183ec .kernel_thread +0x4c
0xc00000003fab3380 4 1 0 0 D 0xc00000003fab3830
events/0
SP(esp) PC(eip) Function(args)
0xc00000003faaf6e0 0x0000000000000000 NO_SYMBOL or Userspace
0xc00000003faaf770 0xc000000000058c40 .schedule +0xb4
0xc00000003faaf840 0xc00000000022fa20 .rwsem_down_write_failed +0x14c
0xc00000003faaf910 0xc00000000026fed0 .bus_add_device +0x11c
0xc00000003faaf9b0 0xc00000000026e288 .device_add +0xd0
0xc00000003faafa50 0xc0000000002cdb00 .scsi_sysfs_add_sdev +0x8c
0xc00000003faafb00 0xc0000000002cbff8 .scsi_probe_and_add_lun +0xb04
0xc00000003faafc00 0xc0000000002ccca0 .scsi_add_device +0x90
0xc00000003faafcb0 0xc0000000002d9458 .ipr_worker_thread +0xc60
0xc00000003faafdc0 0xc00000000007cd9c .worker_thread +0x268
0xc00000003faafee0 0xc0000000000839cc .kthread +0x160
0xc00000003faaff90 0xc0000000000183ec .kernel_thread +0x4c
---
diff -puN lib/kobject.c~kobject_hotplug_hang lib/kobject.c
--- linux-2.6.5/lib/kobject.c~kobject_hotplug_hang Wed Apr 7 15:48:14 2004
+++ linux-2.6.5-bjking1/lib/kobject.c Thu Apr 8 18:27:01 2004
@@ -103,8 +103,14 @@ static void fill_kobj_path(struct kset *
static unsigned long sequence_num;
static spinlock_t sequence_lock = SPIN_LOCK_UNLOCKED;
-static void kset_hotplug(const char *action, struct kset *kset,
- struct kobject *kobj)
+struct hotplug_work {
+ struct work_struct work;
+ const char *action;
+ struct kset *kset;
+ struct kobject *kobj;
+};
+
+static void kset_hotplug_work(void *data)
{
char *argv [3];
char **envp = NULL;
@@ -116,22 +122,26 @@ static void kset_hotplug(const char *act
char *kobj_path = NULL;
char *name = NULL;
unsigned long seq;
+ struct hotplug_work *work = (struct hotplug_work *)data;
+ const char *action = work->action;
+ struct kset *kset = work->kset;
+ struct kobject *kobj = work->kobj;
/* If the kset has a filter operation, call it. If it returns
failure, no hotplug event is required. */
if (kset->hotplug_ops->filter) {
if (!kset->hotplug_ops->filter(kset, kobj))
- return;
+ goto exit;
}
pr_debug ("%s\n", __FUNCTION__);
if (!hotplug_path[0])
- return;
+ goto exit;
envp = kmalloc(NUM_ENVP * sizeof (char *), GFP_KERNEL);
if (!envp)
- return;
+ goto exit;
memset (envp, 0x00, NUM_ENVP * sizeof (char *));
buffer = kmalloc(BUFFER_SIZE, GFP_KERNEL);
@@ -193,10 +203,40 @@ static void kset_hotplug(const char *act
__FUNCTION__, retval);
exit:
+ kset_put(kset);
+ kobject_put(kobj);
kfree(kobj_path);
+ kfree(work);
kfree(buffer);
kfree(envp);
- return;
+}
+
+static void kset_hotplug(const char *action, struct kset *kset,
+ struct kobject *kobj)
+{
+ struct hotplug_work *work;
+
+ if (!(work = kmalloc(sizeof(*work), GFP_KERNEL)))
+ return;
+
+ work->action = action;
+ if (!(work->kset = kset_get(kset))) {
+ kfree(work);
+ return;
+ }
+
+ if (!(work->kobj = kobject_get(kobj))) {
+ kset_put(kset);
+ kfree(work);
+ return;
+ }
+
+ INIT_WORK(&work->work, kset_hotplug_work, work);
+
+ if (keventd_up())
+ schedule_work(&work->work);
+ else
+ kset_hotplug_work(work);
}
void kobject_hotplug(const char *action, struct kobject *kobj)
_
On Fri, Apr 09, 2004 at 03:42:56PM -0500, Brian King wrote:
> Would you prefer a fix in call_usermodehelper itself? It could certainly
> be argued that calling call_usermodehelper with wait=0 should be allowed
> even when holding locks. Although, fixing it here is less obvious to me
> how to do because of the arguments to call_usermodehelper. I would imagine
> it would consist of creating a kernel_thread to preserve the caller's stack.
Yes, I think call_usermodehelper should be changed to create a new
kernel thread for every call. That would solve this problem, and any
future races that might happen. Care to work on that?
thanks,
greg k-h
Greg KH <[email protected]> wrote:
>
> On Fri, Apr 09, 2004 at 03:42:56PM -0500, Brian King wrote:
> > Would you prefer a fix in call_usermodehelper itself? It could certainly
> > be argued that calling call_usermodehelper with wait=0 should be allowed
> > even when holding locks. Although, fixing it here is less obvious to me
> > how to do because of the arguments to call_usermodehelper. I would imagine
> > it would consist of creating a kernel_thread to preserve the caller's stack.
>
> Yes, I think call_usermodehelper should be changed to create a new
> kernel thread for every call.
It does that already. But that thread is parented by keventd. This was
done to avoid all the various nasty things which can happen when you have a
kernel thread and a hotplug helper which are parented by a random userspace
process. All the crap which it might have inherited: uid? gid? signals?
nice? rtprio? rlimits? namespace?
The deadlock opportunity occurs during the call_usermodehelper() handoff to
keventd, which is synchronous.
2-3 years back I did have a call_usermodehelper() which was fully async.
It was pretty unpleasant because of the need to atomically allocate
arbitrary amounts of memory to hold the argv[] and endp[] arrays, to pass
them between a couple of threads and to then correctly free it all up
again.
On Fri, Apr 09, 2004 at 02:15:11PM -0700, Andrew Morton wrote:
> Greg KH <[email protected]> wrote:
> >
> > On Fri, Apr 09, 2004 at 03:42:56PM -0500, Brian King wrote:
> > > Would you prefer a fix in call_usermodehelper itself? It could certainly
> > > be argued that calling call_usermodehelper with wait=0 should be allowed
> > > even when holding locks. Although, fixing it here is less obvious to me
> > > how to do because of the arguments to call_usermodehelper. I would imagine
> > > it would consist of creating a kernel_thread to preserve the caller's stack.
> >
> > Yes, I think call_usermodehelper should be changed to create a new
> > kernel thread for every call.
>
> It does that already. But that thread is parented by keventd. This was
> done to avoid all the various nasty things which can happen when you have a
> kernel thread and a hotplug helper which are parented by a random userspace
> process. All the crap which it might have inherited: uid? gid? signals?
> nice? rtprio? rlimits? namespace?
Yeah, good point.
> The deadlock opportunity occurs during the call_usermodehelper() handoff to
> keventd, which is synchronous.
>
> 2-3 years back I did have a call_usermodehelper() which was fully async.
> It was pretty unpleasant because of the need to atomically allocate
> arbitrary amounts of memory to hold the argv[] and endp[] arrays, to pass
> them between a couple of threads and to then correctly free it all up
> again.
Ok, you've convinced me of the mess that would cause. So what should we
do to help fix this? Serialize call_usermodehelper()?
thanks,
greg k-h
Greg KH <[email protected]> wrote:
>
> > The deadlock opportunity occurs during the call_usermodehelper() handoff to
> > keventd, which is synchronous.
> >
> > 2-3 years back I did have a call_usermodehelper() which was fully async.
> > It was pretty unpleasant because of the need to atomically allocate
> > arbitrary amounts of memory to hold the argv[] and endp[] arrays, to pass
> > them between a couple of threads and to then correctly free it all up
> > again.
>
> Ok, you've convinced me of the mess that would cause. So what should we
> do to help fix this? Serialize call_usermodehelper()?
May as well bring back call_usermodehelper_async() I guess.
There are two patches here, and they are totally untested...
Add some generally-useful string replication functions which are required by
call_usermodehelper_async():
void *kzmalloc(size_t size, int gfp_flags);
kmalloc() then bzero().
char *kstrdup(char *p, int gfp_flags);
kmalloc() then strcpy()
char **kstrdup_vec(char **vec, int gfp_flags);
duplicate an argv[]-style string array
void kfree_strvec(char **vec);
free up the result of a previous kstrdup_vec()
---
25-akpm/drivers/md/dm-ioctl.c | 17 ++----------
25-akpm/include/linux/slab.h | 5 +++
25-akpm/mm/slab.c | 57 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 65 insertions(+), 14 deletions(-)
diff -puN drivers/md/dm-ioctl.c~kstrdup-and-friends drivers/md/dm-ioctl.c
--- 25/drivers/md/dm-ioctl.c~kstrdup-and-friends 2004-04-10 12:50:59.324302648 -0700
+++ 25-akpm/drivers/md/dm-ioctl.c 2004-04-10 12:50:59.341300064 -0700
@@ -118,17 +118,6 @@ static struct hash_cell *__get_uuid_cell
return NULL;
}
-/*-----------------------------------------------------------------
- * Inserting, removing and renaming a device.
- *---------------------------------------------------------------*/
-static inline char *kstrdup(const char *str)
-{
- char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
- if (r)
- strcpy(r, str);
- return r;
-}
-
static struct hash_cell *alloc_cell(const char *name, const char *uuid,
struct mapped_device *md)
{
@@ -138,7 +127,7 @@ static struct hash_cell *alloc_cell(cons
if (!hc)
return NULL;
- hc->name = kstrdup(name);
+ hc->name = kstrdup(name, GFP_KERNEL);
if (!hc->name) {
kfree(hc);
return NULL;
@@ -148,7 +137,7 @@ static struct hash_cell *alloc_cell(cons
hc->uuid = NULL;
else {
- hc->uuid = kstrdup(uuid);
+ hc->uuid = kstrdup(uuid, GFP_KERNEL);
if (!hc->uuid) {
kfree(hc->name);
kfree(hc);
@@ -270,7 +259,7 @@ int dm_hash_rename(const char *old, cons
/*
* duplicate new.
*/
- new_name = kstrdup(new);
+ new_name = kstrdup(new, GFP_KERNEL);
if (!new_name)
return -ENOMEM;
diff -puN include/linux/slab.h~kstrdup-and-friends include/linux/slab.h
--- 25/include/linux/slab.h~kstrdup-and-friends 2004-04-10 12:50:59.326302344 -0700
+++ 25-akpm/include/linux/slab.h 2004-04-10 13:07:58.979291528 -0700
@@ -117,6 +117,11 @@ void ptrinfo(unsigned long addr);
extern atomic_t slab_reclaim_pages;
+void *kzmalloc(size_t size, int gfp_flags);
+char *kstrdup(const char *p, int gfp_flags);
+char **kstrdup_vec(char **vec, int gfp_flags);
+void kfree_strvec(char **vec);
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SLAB_H */
diff -puN mm/slab.c~kstrdup-and-friends mm/slab.c
--- 25/mm/slab.c~kstrdup-and-friends 2004-04-10 12:50:59.328302040 -0700
+++ 25-akpm/mm/slab.c 2004-04-10 13:08:05.602284680 -0700
@@ -3009,3 +3009,60 @@ void ptrinfo(unsigned long addr)
}
}
+
+void *kzmalloc(size_t size, int gfp_flags)
+{
+ void *ret = kmalloc(size, gfp_flags);
+ if (ret)
+ memset(ret, 0, size);
+ return ret;
+}
+EXPORT_SYMBOL(kzmalloc);
+
+char *kstrdup(const char *p, int gfp_flags)
+{
+ char *ret = kmalloc(strlen(p) + 1, gfp_flags);
+ if (ret)
+ strcpy(ret, p);
+ return ret;
+}
+EXPORT_SYMBOL(kstrdup);
+
+char **kstrdup_vec(char **vec, int gfp_flags)
+{
+ char **ret;
+ int nr_strings;
+ int i;
+
+ for (nr_strings = 0; vec[nr_strings]; nr_strings++)
+ ;
+ ret = kzmalloc((nr_strings + 1) * sizeof(*ret), gfp_flags);
+ if (ret == NULL)
+ goto enomem;
+ for (i = 0; i < nr_strings; i++) {
+ ret[i] = kstrdup(vec[i], gfp_flags);
+ if (ret[i] == NULL)
+ goto enomem;
+ }
+ ret[i] = NULL;
+ return ret;
+enomem:
+ kfree_strvec(ret);
+ return NULL;
+}
+EXPORT_SYMBOL(kstrdup_vec);
+
+void kfree_strvec(char **vec)
+{
+ char **p;
+
+ if (vec == NULL)
+ return;
+ p = vec;
+ while (*p) {
+ kfree(*p);
+ p++;
+ }
+ kfree(vec);
+}
+EXPORT_SYMBOL(kfree_strvec);
_
call_usermodehelper() has to synchronously wait until it has handed its local
variables off to keventd. This can lead to deadlocks when the caller holds
semphores which keventd may also want.
To fix this we introduce call_usermodehelper_async(), which does not block on
a keventd response.
---
25-akpm/include/linux/kmod.h | 5 ++-
25-akpm/kernel/kmod.c | 69 +++++++++++++++++++++++++++++++++++++------
2 files changed, 64 insertions(+), 10 deletions(-)
diff -puN include/linux/kmod.h~call_usermodehelper_async include/linux/kmod.h
--- 25/include/linux/kmod.h~call_usermodehelper_async 2004-04-10 13:08:12.554227824 -0700
+++ 25-akpm/include/linux/kmod.h 2004-04-10 13:08:12.558227216 -0700
@@ -32,7 +32,10 @@ static inline int request_module(const c
#endif
#define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x)))
-extern int call_usermodehelper(char *path, char *argv[], char *envp[], int wait);
+
+int call_usermodehelper(char *path, char **argv, char **envp, int wait);
+int call_usermodehelper_async(char *path, char **argv,
+ char **envp, int gfp_flags);
#ifdef CONFIG_HOTPLUG
extern char hotplug_path [];
diff -puN kernel/kmod.c~call_usermodehelper_async kernel/kmod.c
--- 25/kernel/kmod.c~call_usermodehelper_async 2004-04-10 13:08:12.555227672 -0700
+++ 25-akpm/kernel/kmod.c 2004-04-10 13:08:12.559227064 -0700
@@ -109,6 +109,7 @@ int request_module(const char *fmt, ...)
atomic_dec(&kmod_concurrent);
return ret;
}
+EXPORT_SYMBOL(request_module);
#endif /* CONFIG_KMOD */
#ifdef CONFIG_HOTPLUG
@@ -140,7 +141,9 @@ struct subprocess_info {
char **argv;
char **envp;
int wait;
+ int async;
int retval;
+ struct work_struct async_work;
};
/*
@@ -197,6 +200,16 @@ static int wait_for_helper(void *data)
return 0;
}
+static void destroy_subinfo(struct subprocess_info *sub_info)
+{
+ if (!sub_info)
+ return;
+ kfree_strvec(sub_info->argv);
+ kfree_strvec(sub_info->envp);
+ kfree(sub_info->path);
+ kfree(sub_info);
+}
+
/*
* This is run by keventd.
*/
@@ -215,11 +228,15 @@ static void __call_usermodehelper(void *
pid = kernel_thread(____call_usermodehelper, sub_info,
CLONE_VFORK | SIGCHLD);
- if (pid < 0) {
- sub_info->retval = pid;
- complete(sub_info->complete);
- } else if (!sub_info->wait)
- complete(sub_info->complete);
+ if (sub_info->async) {
+ destroy_subinfo(sub_info);
+ } else {
+ if (pid < 0) {
+ sub_info->retval = pid;
+ complete(sub_info->complete);
+ } else if (!sub_info->wait)
+ complete(sub_info->complete);
+ }
}
/**
@@ -265,10 +282,44 @@ int call_usermodehelper(char *path, char
out:
return sub_info.retval;
}
-
EXPORT_SYMBOL(call_usermodehelper);
-#ifdef CONFIG_KMOD
-EXPORT_SYMBOL(request_module);
-#endif
+/**
+ * call_usermodehelper_async - start a usermode application
+ *
+ * Like call_usermodehelper(), except it is fully asynchronous. Should only
+ * be used in extremis, such as when the caller unavoidably holds locks which
+ * keventd might block on.
+ */
+int call_usermodehelper_async(char *path, char **argv,
+ char **envp, int gfp_flags)
+{
+ struct subprocess_info *sub_info;
+
+ if (system_state != SYSTEM_RUNNING)
+ return -EBUSY;
+ if (path[0] == '\0')
+ goto out;
+ sub_info = kzmalloc(sizeof(*sub_info), gfp_flags);
+ if (!sub_info)
+ goto enomem;
+ sub_info->async = 1;
+ sub_info->path = kstrdup(path, gfp_flags);
+ if (!sub_info->path)
+ goto enomem;
+ sub_info->argv = kstrdup_vec(argv, gfp_flags);
+ if (!sub_info->argv)
+ goto enomem;
+ sub_info->envp = kstrdup_vec(envp, gfp_flags);
+ if (!sub_info->envp)
+ goto enomem;
+ INIT_WORK(&sub_info->async_work, __call_usermodehelper, sub_info);
+ schedule_work(&sub_info->async_work);
+out:
+ return 0;
+enomem:
+ destroy_subinfo(sub_info);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(call_usermodehelper_async);
_
On Sat, Apr 10, 2004 at 01:11:37PM -0700, Andrew Morton wrote:
>
> void *kzmalloc(size_t size, int gfp_flags);
>
> kmalloc() then bzero().
Sure, make the kernel-janitor's list even longer now :)
> char *kstrdup(char *p, int gfp_flags);
>
> kmalloc() then strcpy()
Haha, that's Rusty's "Is Linus Awake" patch he tries to send every 6
months or so...
greg k-h