The cgroup_event_wake() function is called with the wait queue head
locked and it takes cgrp->event_list_lock. However, in cgroup_rmdir()
remove_wait_queue() was being called after taking
cgrp->event_list_lock. Correct the lock ordering by using a temporary
list to obtain the event list to remove from the wait queue.
Signed-off-by: Greg Thelen <[email protected]>
Signed-off-by: Aaron Durbin <[email protected]>
---
kernel/cgroup.c | 11 ++++++++---
1 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ece60d4..c79a969 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4276,6 +4276,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
DEFINE_WAIT(wait);
struct cgroup_event *event, *tmp;
struct cgroup_subsys *ss;
+ struct list_head tmp_list;
lockdep_assert_held(&d->d_inode->i_mutex);
lockdep_assert_held(&cgroup_mutex);
@@ -4330,16 +4331,20 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
/*
* Unregister events and notify userspace.
* Notify userspace about cgroup removing only after rmdir of cgroup
- * directory to avoid race between userspace and kernelspace
+ * directory to avoid race between userspace and kernelspace. Use
+ * a temporary list to avoid a deadlock with cgroup_event_wake(). Since
+ * cgroup_event_wake() is called with the wait queue head locked,
+ * remove_wait_queue() cannot be called while holding event_list_lock.
*/
spin_lock(&cgrp->event_list_lock);
- list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
+ list_replace_init(&cgrp->event_list, &tmp_list);
+ spin_unlock(&cgrp->event_list_lock);
+ list_for_each_entry_safe(event, tmp, &tmp_list, list) {
list_del(&event->list);
remove_wait_queue(event->wqh, &event->wait);
eventfd_signal(event->eventfd, 1);
schedule_work(&event->remove);
}
- spin_unlock(&cgrp->event_list_lock);
return 0;
}
--
1.7.7.3
Hello, Greg.
On Wed, Nov 28, 2012 at 12:15:42PM -0800, Greg Thelen wrote:
> @@ -4276,6 +4276,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
> DEFINE_WAIT(wait);
> struct cgroup_event *event, *tmp;
> struct cgroup_subsys *ss;
> + struct list_head tmp_list;
LIST_HEAD(tmp_list);
>
> lockdep_assert_held(&d->d_inode->i_mutex);
> lockdep_assert_held(&cgroup_mutex);
> @@ -4330,16 +4331,20 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
> /*
> * Unregister events and notify userspace.
> * Notify userspace about cgroup removing only after rmdir of cgroup
> - * directory to avoid race between userspace and kernelspace
> + * directory to avoid race between userspace and kernelspace. Use
> + * a temporary list to avoid a deadlock with cgroup_event_wake(). Since
> + * cgroup_event_wake() is called with the wait queue head locked,
> + * remove_wait_queue() cannot be called while holding event_list_lock.
> */
> spin_lock(&cgrp->event_list_lock);
> - list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
> + list_replace_init(&cgrp->event_list, &tmp_list);
list_splice_init();
would be more conventional, I think.
> + spin_unlock(&cgrp->event_list_lock);
> + list_for_each_entry_safe(event, tmp, &tmp_list, list) {
> list_del(&event->list);
Maybe convert this to list_del_init() while at it?
> remove_wait_queue(event->wqh, &event->wait);
> eventfd_signal(event->eventfd, 1);
> schedule_work(&event->remove);
> }
> - spin_unlock(&cgrp->event_list_lock);
Thanks.
--
tejun
On Wed, Nov 28 2012, Tejun Heo wrote:
> Hello, Greg.
>
> On Wed, Nov 28, 2012 at 12:15:42PM -0800, Greg Thelen wrote:
>> @@ -4276,6 +4276,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
>> DEFINE_WAIT(wait);
>> struct cgroup_event *event, *tmp;
>> struct cgroup_subsys *ss;
>> + struct list_head tmp_list;
>
> LIST_HEAD(tmp_list);
>
>>
>> lockdep_assert_held(&d->d_inode->i_mutex);
>> lockdep_assert_held(&cgroup_mutex);
>> @@ -4330,16 +4331,20 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
>> /*
>> * Unregister events and notify userspace.
>> * Notify userspace about cgroup removing only after rmdir of cgroup
>> - * directory to avoid race between userspace and kernelspace
>> + * directory to avoid race between userspace and kernelspace. Use
>> + * a temporary list to avoid a deadlock with cgroup_event_wake(). Since
>> + * cgroup_event_wake() is called with the wait queue head locked,
>> + * remove_wait_queue() cannot be called while holding event_list_lock.
>> */
>> spin_lock(&cgrp->event_list_lock);
>> - list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
>> + list_replace_init(&cgrp->event_list, &tmp_list);
>
> list_splice_init();
>
> would be more conventional, I think.
I used list_replace_init() because it avoided unnecessary init of
tmp_list. But I have no objection to list_splice_init(). I'll repost
with list_splice_init().
>> + spin_unlock(&cgrp->event_list_lock);
>> + list_for_each_entry_safe(event, tmp, &tmp_list, list) {
>> list_del(&event->list);
>
> Maybe convert this to list_del_init() while at it?
I assume this isn't technically required, but defensive. Seems like we
should also convert the list_del(&event->list) in cgroup_event_wake()
for consistency. I'll send these in separate patch.
>> remove_wait_queue(event->wqh, &event->wait);
>> eventfd_signal(event->eventfd, 1);
>> schedule_work(&event->remove);
>> }
>> - spin_unlock(&cgrp->event_list_lock);
>
> Thanks.