LinuxLists.cc - [PATCH] cgroup/cpuset: remove circular dependency deadlock

2017-09-07 13:56:41

Subject: [PATCH] cgroup/cpuset: remove circular dependency deadlock

Remove circular dependency deadlock in a scenario where hotplug of CPU is
being done while there is updation in cgroup and cpuset triggered from
userspace.

Process A => kthreadd => Process B => Process C => Process A

Process A
cpu_subsys_offline();
cpu_down();
_cpu_down();
percpu_down_write(&cpu_hotplug_lock); //held
cpuhp_invoke_callback();
workqueue_offline_cpu();
wq_update_unbound_numa();
kthread_create_on_node();
wake_up_process(); //wakeup kthreadd
flush_work();
wait_for_completion();

kthreadd
kthreadd();
kernel_thread();
do_fork();
copy_process();
percpu_down_read(&cgroup_threadgroup_rwsem);
__rwsem_down_read_failed_common(); //waiting

Process B
kernfs_fop_write();
cgroup_file_write();
cgroup_procs_write();
percpu_down_write(&cgroup_threadgroup_rwsem); //held
cgroup_attach_task();
cgroup_migrate();
cgroup_migrate_execute();
cpuset_can_attach();
mutex_lock(&cpuset_mutex); //waiting

Process C
kernfs_fop_write();
cgroup_file_write();
cpuset_write_resmask();
mutex_lock(&cpuset_mutex); //held
update_cpumask();
update_cpumasks_hier();
rebuild_sched_domains_locked();
get_online_cpus();
percpu_down_read(&cpu_hotplug_lock); //waiting

Signed-off-by: Prateek Sood <[email protected]>
---
kernel/cgroup/cpuset.c | 32 +++++++++++++++++++-------------
1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 2f4039b..60dc0ac 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -816,16 +816,15 @@ static int generate_sched_domains(cpumask_var_t **domains,
* 'cpus' is removed, then call this routine to rebuild the
* scheduler's dynamic sched domains.
*
- * Call with cpuset_mutex held. Takes get_online_cpus().
*/
-static void rebuild_sched_domains_locked(void)
+static void rebuild_sched_domains_cpuslocked(void)
{
struct sched_domain_attr *attr;
cpumask_var_t *doms;
int ndoms;

+ lockdep_assert_cpus_held();
lockdep_assert_held(&cpuset_mutex);
- get_online_cpus();

/*
* We have raced with CPU hotplug. Don't do anything to avoid
@@ -833,27 +832,27 @@ static void rebuild_sched_domains_locked(void)
* Anyways, hotplug work item will rebuild sched domains.
*/
if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
- goto out;
+ return;

/* Generate domain masks and attrs */
ndoms = generate_sched_domains(&doms, &attr);

/* Have scheduler rebuild the domains */
partition_sched_domains(ndoms, doms, attr);
-out:
- put_online_cpus();
}
#else /* !CONFIG_SMP */
-static void rebuild_sched_domains_locked(void)
+static void rebuild_sched_domains_cpuslocked(void)
{
}
#endif /* CONFIG_SMP */

void rebuild_sched_domains(void)
{
+ get_online_cpus();
mutex_lock(&cpuset_mutex);
- rebuild_sched_domains_locked();
+ rebuild_sched_domains_cpuslocked();
mutex_unlock(&cpuset_mutex);
+ put_online_cpus();
}

/**
@@ -940,7 +939,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
rcu_read_unlock();

if (need_rebuild_sched_domains)
- rebuild_sched_domains_locked();
+ rebuild_sched_domains_cpuslocked();
}

/**
@@ -1273,7 +1272,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
cs->relax_domain_level = val;
if (!cpumask_empty(cs->cpus_allowed) &&
is_sched_load_balance(cs))
- rebuild_sched_domains_locked();
+ rebuild_sched_domains_cpuslocked();
}

return 0;
@@ -1306,7 +1305,6 @@ static void update_tasks_flags(struct cpuset *cs)
*
* Call with cpuset_mutex held.
*/
-
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
int turning_on)
{
@@ -1339,7 +1337,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
spin_unlock_irq(&callback_lock);

if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
- rebuild_sched_domains_locked();
+ rebuild_sched_domains_cpuslocked();

if (spread_flag_changed)
update_tasks_flags(cs);
@@ -1607,6 +1605,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
cpuset_filetype_t type = cft->private;
int retval = 0;

+ get_online_cpus();
mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs)) {
retval = -ENODEV;
@@ -1644,6 +1643,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
}
out_unlock:
mutex_unlock(&cpuset_mutex);
+ put_online_cpus();
return retval;
}

@@ -1654,6 +1654,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
cpuset_filetype_t type = cft->private;
int retval = -ENODEV;

+ get_online_cpus();
mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs))
goto out_unlock;
@@ -1668,6 +1669,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
}
out_unlock:
mutex_unlock(&cpuset_mutex);
+ put_online_cpus();
return retval;
}

@@ -1706,6 +1708,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
kernfs_break_active_protection(of->kn);
flush_work(&cpuset_hotplug_work);

+ get_online_cpus();
mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs))
goto out_unlock;
@@ -1731,6 +1734,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
free_trial_cpuset(trialcs);
out_unlock:
mutex_unlock(&cpuset_mutex);
+ put_online_cpus();
kernfs_unbreak_active_protection(of->kn);
css_put(&cs->css);
flush_workqueue(cpuset_migrate_mm_wq);
@@ -2031,13 +2035,14 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
/*
* If the cpuset being removed has its flag 'sched_load_balance'
* enabled, then simulate turning sched_load_balance off, which
- * will call rebuild_sched_domains_locked().
+ * will call rebuild_sched_domains_cpuslocked().
*/

static void cpuset_css_offline(struct cgroup_subsys_state *css)
{
struct cpuset *cs = css_cs(css);

+ get_online_cpus();
mutex_lock(&cpuset_mutex);

if (is_sched_load_balance(cs))
@@ -2047,6 +2052,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
clear_bit(CS_ONLINE, &cs->flags);

mutex_unlock(&cpuset_mutex);
+ put_online_cpus();
}

static void cpuset_css_free(struct cgroup_subsys_state *css)
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.,
is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.

2017-09-07 17:45:21

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH] cgroup/cpuset: remove circular dependency deadlock

On Thu, Sep 07, 2017 at 07:26:23PM +0530, Prateek Sood wrote:
> Remove circular dependency deadlock in a scenario where hotplug of CPU is
> being done while there is updation in cgroup and cpuset triggered from
> userspace.
>
> Process A => kthreadd => Process B => Process C => Process A

> Process A
> cpu_subsys_offline();
> cpu_down();
> _cpu_down();
> percpu_down_write(&cpu_hotplug_lock); //held
> cpuhp_invoke_callback();
> workqueue_offline_cpu();
> wq_update_unbound_numa();
> kthread_create_on_node();
> wake_up_process(); //wakeup kthreadd

TJ, I'm puzzled, why would we need to spawn new threads to update NUMA
affinity when taking a CPU out? That doesn't make sense to me, we can
either shrink the affinity of an existing thread or completely kill of a
thread if the mask becomes empty. But why spawn a new thread?

> flush_work();
> wait_for_completion();
>
> kthreadd
> kthreadd();
> kernel_thread();
> do_fork();
> copy_process();
> percpu_down_read(&cgroup_threadgroup_rwsem);
> __rwsem_down_read_failed_common(); //waiting

So this will eventually do our:

complete() to make A go.

> Process B
> kernfs_fop_write();
> cgroup_file_write();
> cgroup_procs_write();
> percpu_down_write(&cgroup_threadgroup_rwsem); //held
> cgroup_attach_task();
> cgroup_migrate();
> cgroup_migrate_execute();
> cpuset_can_attach();
> mutex_lock(&cpuset_mutex); //waiting
>
> Process C
> kernfs_fop_write();
> cgroup_file_write();
> cpuset_write_resmask();
> mutex_lock(&cpuset_mutex); //held
> update_cpumask();
> update_cpumasks_hier();
> rebuild_sched_domains_locked();
> get_online_cpus();
> percpu_down_read(&cpu_hotplug_lock); //waiting

So the whole thing looks like:

A B C D

L(hotplug)
L(threadgroup)
L(cpuset)
L(threadgroup)
WFC(c)
L(cpuset)
L(hotplug)
C(c)

Yes, inverting cpuset and hotplug would break that chain, but I'm still
wondering why workqueue needs to spawn threads on CPU down.

2017-09-07 17:51:19

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH] cgroup/cpuset: remove circular dependency deadlock

On Thu, Sep 07, 2017 at 07:26:23PM +0530, Prateek Sood wrote:
> Remove circular dependency deadlock in a scenario where hotplug of CPU is
> being done while there is updation in cgroup and cpuset triggered from
> userspace.

You've forgotten to mention your solution to the deadlock, namely
inverting cpuset_mutex and cpu_hotplug_lock.

> Signed-off-by: Prateek Sood <[email protected]>
> ---
> kernel/cgroup/cpuset.c | 32 +++++++++++++++++++-------------
> 1 file changed, 19 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index 2f4039b..60dc0ac 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -816,16 +816,15 @@ static int generate_sched_domains(cpumask_var_t **domains,
> * 'cpus' is removed, then call this routine to rebuild the
> * scheduler's dynamic sched domains.
> *
> - * Call with cpuset_mutex held. Takes get_online_cpus().
> */
> -static void rebuild_sched_domains_locked(void)
> +static void rebuild_sched_domains_cpuslocked(void)
> {
> struct sched_domain_attr *attr;
> cpumask_var_t *doms;
> int ndoms;
>
> + lockdep_assert_cpus_held();
> lockdep_assert_held(&cpuset_mutex);
> - get_online_cpus();
>
> /*
> * We have raced with CPU hotplug. Don't do anything to avoid
> @@ -833,27 +832,27 @@ static void rebuild_sched_domains_locked(void)
> * Anyways, hotplug work item will rebuild sched domains.
> */
> if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
> - goto out;
> + return;
>
> /* Generate domain masks and attrs */
> ndoms = generate_sched_domains(&doms, &attr);
>
> /* Have scheduler rebuild the domains */
> partition_sched_domains(ndoms, doms, attr);
> -out:
> - put_online_cpus();
> }
> #else /* !CONFIG_SMP */
> -static void rebuild_sched_domains_locked(void)
> +static void rebuild_sched_domains_cpuslocked(void)
> {
> }
> #endif /* CONFIG_SMP */
>
> void rebuild_sched_domains(void)
> {
> + get_online_cpus();
> mutex_lock(&cpuset_mutex);
> - rebuild_sched_domains_locked();
> + rebuild_sched_domains_cpuslocked();
> mutex_unlock(&cpuset_mutex);
> + put_online_cpus();
> }

But if you invert these locks, the need for cpuset_hotplug_workfn() goes
away, at least for the CPU part, and we can make in synchronous again.
Yay!!

Also, I think new code should use cpus_read_lock() instead of
get_online_cpus().

2017-09-08 02:13:30

by Prateek Sood

[permalink] [raw]

Subject: Re: [PATCH] cgroup/cpuset: remove circular dependency deadlock

On 09/07/2017 11:15 PM, Peter Zijlstra wrote:
> On Thu, Sep 07, 2017 at 07:26:23PM +0530, Prateek Sood wrote:
>> Remove circular dependency deadlock in a scenario where hotplug of CPU is
>> being done while there is updation in cgroup and cpuset triggered from
>> userspace.
>>
>> Process A => kthreadd => Process B => Process C => Process A
>
>> Process A
>> cpu_subsys_offline();
>> cpu_down();
>> _cpu_down();
>> percpu_down_write(&cpu_hotplug_lock); //held
>> cpuhp_invoke_callback();
>> workqueue_offline_cpu();
>> wq_update_unbound_numa();
>> kthread_create_on_node();
>> wake_up_process(); //wakeup kthreadd
>
> TJ, I'm puzzled, why would we need to spawn new threads to update NUMA
> affinity when taking a CPU out? That doesn't make sense to me, we can
> either shrink the affinity of an existing thread or completely kill of a
> thread if the mask becomes empty. But why spawn a new thread?
>
>> flush_work();
>> wait_for_completion();
>>

>
> Yes, inverting cpuset and hotplug would break that chain, but I'm still
> wondering why workqueue needs to spawn threads on CPU down.
>

Thanks for the comments Peter

You rightly mentioned that a new thread will not be spawn
while updating NUMA affinity when taking a CPU out.

While a CPU is made offline, attempt is made to unbind per-cpu
worker for CPU going down. This is done by queuing unbind work
to system_highpri_wq. It results in an attempt to create one
bounded worker thread as there is none.

wait_for_completion() in flush_work() waits for unbinding to
finish for CPU going down.

Process A
cpu_subsys_offline();
cpu_down();
_cpu_down();
percpu_down_write(&cpu_hotplug_lock); //held
cpuhp_invoke_callback();
workqueue_offline_cpu();
queue_work_on(system_highpri_wq);
__queue_work();
insert_work();
wake_up_worker(); //pool->nr_running = 0
flush_work();
wait_for_completion();

worker_thread();
need_more_worker(); // returns true
manage_workers();
maybe_create_worker();
create_worker();
kthread_create_on_node();
wake_up_process(kthreadd_task);

--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project

2017-10-27 08:05:58

by Prateek Sood

[permalink] [raw]

Subject: Re: [PATCH] cgroup/cpuset: remove circular dependency deadlock

On 10/26/2017 07:35 PM, Waiman Long wrote:
> On 10/26/2017 07:52 AM, Prateek Sood wrote:
>> Remove circular dependency deadlock in a scenario where hotplug of CPU is
>> being done while there is updation in cgroup and cpuset triggered from
>> userspace.
>>
>> Process A => kthreadd => Process B => Process C => Process A
>>
>> Process A
>> cpu_subsys_offline();
>> cpu_down();
>> _cpu_down();
>> percpu_down_write(&cpu_hotplug_lock); //held
>> cpuhp_invoke_callback();
>> workqueue_offline_cpu();
>> wq_update_unbound_numa();
>> kthread_create_on_node();
>> wake_up_process(); //wakeup kthreadd
>> flush_work();
>> wait_for_completion();
>>
>> kthreadd
>> kthreadd();
>> kernel_thread();
>> do_fork();
>> copy_process();
>> percpu_down_read(&cgroup_threadgroup_rwsem);
>> __rwsem_down_read_failed_common(); //waiting
>>
>> Process B
>> kernfs_fop_write();
>> cgroup_file_write();
>> cgroup_procs_write();
>> percpu_down_write(&cgroup_threadgroup_rwsem); //held
>> cgroup_attach_task();
>> cgroup_migrate();
>> cgroup_migrate_execute();
>> cpuset_can_attach();
>> mutex_lock(&cpuset_mutex); //waiting
>>
>> Process C
>> kernfs_fop_write();
>> cgroup_file_write();
>> cpuset_write_resmask();
>> mutex_lock(&cpuset_mutex); //held
>> update_cpumask();
>> update_cpumasks_hier();
>> rebuild_sched_domains_locked();
>> get_online_cpus();
>> percpu_down_read(&cpu_hotplug_lock); //waiting
>>
>> Eliminating deadlock by reversing the locking order for cpuset_mutex and
>> cpu_hotplug_lock.
>
> General comments:
>
> Please add a version number of your patch. I have seen multiple versions
> of this patch and have lost track how many are there as there is no
> version number information. In addition, there are changes beyond just
> swapping the lock order and they are not documented in this change log.
> I would like to see you discuss about those additional changes here as well.
Thanks for the comments Longman. I will introduce patch versioning and update
commit text to document extra changes.

Explaintaion for extra changes in this patch:
After inverting the locking sequence of cpu_hotplug_lock and cpuset_mutex,
cpuset_hotplug_workfn() related functionality can be done synchronously from
the context doing cpu hotplug. Extra changes in this patch intend to remove
queuing of cpuset_hotplug_workfn() as a work item for cpu hotplug path. For
memory hotplug it still gets queued as a work item.

This suggestion came in from Peter.
Peter could you please elaborate if I have missed anything.

>
>> void rebuild_sched_domains(void)
>> {
>> + cpus_read_lock();
>> mutex_lock(&cpuset_mutex);
>> - rebuild_sched_domains_locked();
>> + rebuild_sched_domains_cpuslocked();
>> mutex_unlock(&cpuset_mutex);
>> + cpus_read_unlock();
>> }
>
> I saw a lot of instances where cpus_read_lock() and mutex_lock() come
> together. Maybe some new lock/unlock helper functions may help.
Ok, I will introduce a single wrapper for locking and unlocking
of both locks

>
>> @@ -2356,25 +2354,29 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
>> }
>>
>> /* rebuild sched domains if cpus_allowed has changed */
>> - if (cpus_updated || force_rebuild) {
>> - force_rebuild = false;
>> - rebuild_sched_domains();
>> + if (cpus_updated) {
>> + if (use_cpu_hp_lock)
>> + rebuild_sched_domains();
>> + else {
>> + /* When called during cpu hotplug cpu_hotplug_lock
>> + * is held by the calling thread, not
>> + * not cpuhp_thread_fun
>> + */
>
> ??? The comment is not clear.

Following is the scenario that is described by the comment

Process A
_cpu_down()
cpus_write_lock() //cpu_hotplug_lock held
cpuhp_kick_ap_work()
cpuhp_kick_ap()
wake_up_process() // wake up cpuhp_thread_fun
wait_for_ap_thread() //wait for hotplug thread to signal completion

cpuhp_thread_fun()
cpuhp_invoke_callback()
sched_cpu_deactivate()
cpuset_cpu_inactive()
cpuset_update_active_cpus()
cpuset_hotplug(false) \\ do not use cpu_hotplug_lock from _cpu_down() path

I will update the comment in next version of patch to elaborate more.

>
> Cheers,
> Longman
>

--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project

From 1582329255294856720@xxx Thu Oct 26 14:06:10 +0000 2017
X-GM-THRID: 1577859769769316492
X-Gmail-Labels: Inbox,Category Forums

2017-10-26 14:06:11

by Waiman Long

[permalink] [raw]

Subject: Re: [PATCH] cgroup/cpuset: remove circular dependency deadlock

On 10/26/2017 07:52 AM, Prateek Sood wrote:
> Remove circular dependency deadlock in a scenario where hotplug of CPU is
> being done while there is updation in cgroup and cpuset triggered from
> userspace.
>
> Process A => kthreadd => Process B => Process C => Process A
>
> Process A
> cpu_subsys_offline();
> cpu_down();
> _cpu_down();
> percpu_down_write(&cpu_hotplug_lock); //held
> cpuhp_invoke_callback();
> workqueue_offline_cpu();
> wq_update_unbound_numa();
> kthread_create_on_node();
> wake_up_process(); //wakeup kthreadd
> flush_work();
> wait_for_completion();
>
> kthreadd
> kthreadd();
> kernel_thread();
> do_fork();
> copy_process();
> percpu_down_read(&cgroup_threadgroup_rwsem);
> __rwsem_down_read_failed_common(); //waiting
>
> Process B
> kernfs_fop_write();
> cgroup_file_write();
> cgroup_procs_write();
> percpu_down_write(&cgroup_threadgroup_rwsem); //held
> cgroup_attach_task();
> cgroup_migrate();
> cgroup_migrate_execute();
> cpuset_can_attach();
> mutex_lock(&cpuset_mutex); //waiting
>
> Process C
> kernfs_fop_write();
> cgroup_file_write();
> cpuset_write_resmask();
> mutex_lock(&cpuset_mutex); //held
> update_cpumask();
> update_cpumasks_hier();
> rebuild_sched_domains_locked();
> get_online_cpus();
> percpu_down_read(&cpu_hotplug_lock); //waiting
>
> Eliminating deadlock by reversing the locking order for cpuset_mutex and
> cpu_hotplug_lock.

General comments:

Please add a version number of your patch. I have seen multiple versions
of this patch and have lost track how many are there as there is no
version number information. In addition, there are changes beyond just
swapping the lock order and they are not documented in this change log.
I would like to see you discuss about those additional changes here as well.

> void rebuild_sched_domains(void)
> {
> + cpus_read_lock();
> mutex_lock(&cpuset_mutex);
> - rebuild_sched_domains_locked();
> + rebuild_sched_domains_cpuslocked();
> mutex_unlock(&cpuset_mutex);
> + cpus_read_unlock();
> }

I saw a lot of instances where cpus_read_lock() and mutex_lock() come
together. Maybe some new lock/unlock helper functions may help.

> @@ -2356,25 +2354,29 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
> }
>
> /* rebuild sched domains if cpus_allowed has changed */
> - if (cpus_updated || force_rebuild) {
> - force_rebuild = false;
> - rebuild_sched_domains();
> + if (cpus_updated) {
> + if (use_cpu_hp_lock)
> + rebuild_sched_domains();
> + else {
> + /* When called during cpu hotplug cpu_hotplug_lock
> + * is held by the calling thread, not
> + * not cpuhp_thread_fun
> + */

??? The comment is not clear.

Cheers,
Longman

From 1582320954128924822@xxx Thu Oct 26 11:54:14 +0000 2017
X-GM-THRID: 1577859769769316492
X-Gmail-Labels: Inbox,Category Forums

2017-10-09 13:28:34

by Prateek Sood

[permalink] [raw]

Subject: Re: [PATCH] cgroup/cpuset: remove circular dependency deadlock

On 09/07/2017 11:21 PM, Peter Zijlstra wrote:
> On Thu, Sep 07, 2017 at 07:26:23PM +0530, Prateek Sood wrote:
>> Remove circular dependency deadlock in a scenario where hotplug of CPU is
>> being done while there is updation in cgroup and cpuset triggered from
>> userspace.
>
> You've forgotten to mention your solution to the deadlock, namely
> inverting cpuset_mutex and cpu_hotplug_lock.
>
>> Signed-off-by: Prateek Sood <[email protected]>
>> ---
>> kernel/cgroup/cpuset.c | 32 +++++++++++++++++++-------------
>> 1 file changed, 19 insertions(+), 13 deletions(-)
>>
>> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
>> index 2f4039b..60dc0ac 100644
>> --- a/kernel/cgroup/cpuset.c
>> +++ b/kernel/cgroup/cpuset.c
>> @@ -816,16 +816,15 @@ static int generate_sched_domains(cpumask_var_t **domains,
>> * 'cpus' is removed, then call this routine to rebuild the
>> * scheduler's dynamic sched domains.
>> *
>> - * Call with cpuset_mutex held. Takes get_online_cpus().
>> */
>> -static void rebuild_sched_domains_locked(void)
>> +static void rebuild_sched_domains_cpuslocked(void)
>> {
>> struct sched_domain_attr *attr;
>> cpumask_var_t *doms;
>> int ndoms;
>>
>> + lockdep_assert_cpus_held();
>> lockdep_assert_held(&cpuset_mutex);
>> - get_online_cpus();
>>
>> /*
>> * We have raced with CPU hotplug. Don't do anything to avoid
>> @@ -833,27 +832,27 @@ static void rebuild_sched_domains_locked(void)
>> * Anyways, hotplug work item will rebuild sched domains.
>> */
>> if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
>> - goto out;
>> + return;
>>
>> /* Generate domain masks and attrs */
>> ndoms = generate_sched_domains(&doms, &attr);
>>
>> /* Have scheduler rebuild the domains */
>> partition_sched_domains(ndoms, doms, attr);
>> -out:
>> - put_online_cpus();
>> }
>> #else /* !CONFIG_SMP */
>> -static void rebuild_sched_domains_locked(void)
>> +static void rebuild_sched_domains_cpuslocked(void)
>> {
>> }
>> #endif /* CONFIG_SMP */
>>
>> void rebuild_sched_domains(void)
>> {
>> + get_online_cpus();
>> mutex_lock(&cpuset_mutex);
>> - rebuild_sched_domains_locked();
>> + rebuild_sched_domains_cpuslocked();
>> mutex_unlock(&cpuset_mutex);
>> + put_online_cpus();
>> }
>
> But if you invert these locks, the need for cpuset_hotplug_workfn() goes
> away, at least for the CPU part, and we can make in synchronous again.
> Yay!!
>
> Also, I think new code should use cpus_read_lock() instead of
> get_online_cpus().
>

Thanks for the review comments Peter.
For patch related to circular deadlock, I will send an updated version.

The callback making a call to cpuset_hotplug_workfn()in hotplug path are
[CPUHP_AP_ACTIVE] = {
.name = "sched:active",
.startup.single = sched_cpu_activate,
.teardown.single = sched_cpu_deactivate,
},

if we make cpuset_hotplug_workfn() synchronous, deadlock might happen:
_cpu_down()
cpus_write_lock() //held
cpuhp_kick_ap_work()
cpuhp_kick_ap()
__cpuhp_kick_ap()
wake_up_process() //cpuhp_thread_fun
wait_for_ap_thread() //wait for complete from cpuhp_thread_fun()

cpuhp_thread_fun()
cpuhp_invoke_callback()
sched_cpu_deactivate()
cpuset_cpu_inactive()
cpuset_update_active_cpus()
cpuset_hotplug_work()
rebuild_sched_domains()
cpus_read_lock() //waiting as acquired in _cpu_down()

--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project

From 1577935939548195325@xxx Fri Sep 08 02:16:18 +0000 2017
X-GM-THRID: 1577859769769316492
X-Gmail-Labels: Inbox,Category Forums