Since recent chanegs to ondemand and conservative governor, there have been
multiple reports of lockdep issues in cpufreq. Patch series takes care of
these problems.
This is the next attempt following the one here, which was not a complete fix.
http://lkml.indiana.edu/hypermail/linux/kernel/0906.3/01073.html
I am currently running some stress tests to make sure there are no issues with
these patches. But, wanted to send them out for review/comments/testing
before I head out for the long weekend.
If this patchset seems sane, the first patch in the patchset should also get
into 30.stable.
--
(Can you give this patch a try and review my locking fixes ? I'm
uncomfortable modifying cpufreq locking before getting this fix correct
in the first place)
OK, I've tried to clean it up the best I could, but please test this with
concurrent cpu hotplug and cpufreq add/remove in loops. I'm sure we will make
other interesting findings.
This is step one of fixing the overall cpufreq locking dependency mess.
Signed-off-by: Mathieu Desnoyers <[email protected]>
CC: "Pallipadi, Venkatesh" <[email protected]>
CC: Dave Jones <[email protected]>,
CC: Ingo Molnar <[email protected]>
CC: "Rafael J. Wysocki" <[email protected]>
CC: Dave Young <[email protected]>
CC: Pekka Enberg <[email protected]>
CC: Thomas Renninger <[email protected]>
---
drivers/cpufreq/cpufreq.c | 65 ++++++++++++++++++++++++++++------------------
1 file changed, 40 insertions(+), 25 deletions(-)
Index: linux-2.6-lttng/drivers/cpufreq/cpufreq.c
===================================================================
--- linux-2.6-lttng.orig/drivers/cpufreq/cpufreq.c 2009-07-02 20:47:19.000000000 -0400
+++ linux-2.6-lttng/drivers/cpufreq/cpufreq.c 2009-07-02 21:52:56.000000000 -0400
@@ -763,6 +763,10 @@ static struct kobj_type ktype_cpufreq =
* cpufreq_add_dev - add a CPU device
*
* Adds the cpufreq interface for a CPU device.
+ *
+ * The Oracle says: try running cpufreq registration/unregistration concurrently
+ * with with cpu hotplugging and all hell will break loose. Tried to clean this
+ * mess up, but more thorough testing is needed. - Mathieu
*/
static int cpufreq_add_dev(struct sys_device *sys_dev)
{
@@ -806,15 +810,12 @@ static int cpufreq_add_dev(struct sys_de
goto nomem_out;
}
if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) {
- kfree(policy);
ret = -ENOMEM;
- goto nomem_out;
+ goto err_free_policy;
}
if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
- free_cpumask_var(policy->cpus);
- kfree(policy);
ret = -ENOMEM;
- goto nomem_out;
+ goto err_free_cpumask;
}
policy->cpu = cpu;
@@ -822,7 +823,8 @@ static int cpufreq_add_dev(struct sys_de
/* Initially set CPU itself as the policy_cpu */
per_cpu(policy_cpu, cpu) = cpu;
- lock_policy_rwsem_write(cpu);
+ ret = (lock_policy_rwsem_write(cpu) < 0);
+ WARN_ON(ret);
init_completion(&policy->kobj_unregister);
INIT_WORK(&policy->update, handle_update);
@@ -835,7 +837,7 @@ static int cpufreq_add_dev(struct sys_de
ret = cpufreq_driver->init(policy);
if (ret) {
dprintk("initialization failed\n");
- goto err_out;
+ goto err_unlock_policy;
}
policy->user_policy.min = policy->min;
policy->user_policy.max = policy->max;
@@ -860,15 +862,21 @@ static int cpufreq_add_dev(struct sys_de
/* Check for existing affected CPUs.
* They may not be aware of it due to CPU Hotplug.
*/
- managed_policy = cpufreq_cpu_get(j); /* FIXME: Where is this released? What about error paths? */
+ managed_policy = cpufreq_cpu_get(j);
if (unlikely(managed_policy)) {
/* Set proper policy_cpu */
unlock_policy_rwsem_write(cpu);
per_cpu(policy_cpu, cpu) = managed_policy->cpu;
- if (lock_policy_rwsem_write(cpu) < 0)
- goto err_out_driver_exit;
+ if (lock_policy_rwsem_write(cpu) < 0) {
+ /* Should not go through policy unlock path */
+ if (cpufreq_driver->exit)
+ cpufreq_driver->exit(policy);
+ ret = -EBUSY;
+ cpufreq_cpu_put(managed_policy);
+ goto err_free_cpumask;
+ }
spin_lock_irqsave(&cpufreq_driver_lock, flags);
cpumask_copy(managed_policy->cpus, policy->cpus);
@@ -879,12 +887,14 @@ static int cpufreq_add_dev(struct sys_de
ret = sysfs_create_link(&sys_dev->kobj,
&managed_policy->kobj,
"cpufreq");
- if (ret)
- goto err_out_driver_exit;
-
- cpufreq_debug_enable_ratelimit();
- ret = 0;
- goto err_out_driver_exit; /* call driver->exit() */
+ if (!ret)
+ cpufreq_cpu_put(managed_policy);
+ /*
+ * Success. We only needed to be added to the mask.
+ * Call driver->exit() because only the cpu parent of
+ * the kobj needed to call init().
+ */
+ goto out_driver_exit; /* call driver->exit() */
}
}
#endif
@@ -894,25 +904,25 @@ static int cpufreq_add_dev(struct sys_de
ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj,
"cpufreq");
if (ret)
- goto err_out_driver_exit;
+ goto out_driver_exit;
/* set up files for this cpu device */
drv_attr = cpufreq_driver->attr;
while ((drv_attr) && (*drv_attr)) {
ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
if (ret)
- goto err_out_driver_exit;
+ goto err_out_kobj_put;
drv_attr++;
}
if (cpufreq_driver->get) {
ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
if (ret)
- goto err_out_driver_exit;
+ goto err_out_kobj_put;
}
if (cpufreq_driver->target) {
ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
if (ret)
- goto err_out_driver_exit;
+ goto err_out_kobj_put;
}
spin_lock_irqsave(&cpufreq_driver_lock, flags);
@@ -930,12 +940,14 @@ static int cpufreq_add_dev(struct sys_de
continue;
dprintk("CPU %u already managed, adding link\n", j);
- cpufreq_cpu_get(cpu);
+ managed_policy = cpufreq_cpu_get(cpu);
cpu_sys_dev = get_cpu_sysdev(j);
ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
"cpufreq");
- if (ret)
+ if (ret) {
+ cpufreq_cpu_put(managed_policy);
goto err_out_unregister;
+ }
}
policy->governor = NULL; /* to assure that the starting sequence is
@@ -967,17 +979,20 @@ err_out_unregister:
per_cpu(cpufreq_cpu_data, j) = NULL;
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+err_out_kobj_put:
kobject_put(&policy->kobj);
wait_for_completion(&policy->kobj_unregister);
-err_out_driver_exit:
+out_driver_exit:
if (cpufreq_driver->exit)
cpufreq_driver->exit(policy);
-err_out:
+err_unlock_policy:
unlock_policy_rwsem_write(cpu);
+err_free_cpumask:
+ free_cpumask_var(policy->cpus);
+err_free_policy:
kfree(policy);
-
nomem_out:
module_put(cpufreq_driver->owner);
module_out:
--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
* [email protected] <[email protected]> wrote:
> Since recent chanegs to ondemand and conservative governor, there
> have been multiple reports of lockdep issues in cpufreq. Patch
> series takes care of these problems.
>
> This is the next attempt following the one here, which was not a
> complete fix.
> http://lkml.indiana.edu/hypermail/linux/kernel/0906.3/01073.html
>
> I am currently running some stress tests to make sure there are no
> issues with these patches. But, wanted to send them out for
> review/comments/testing before I head out for the long weekend.
>
> If this patchset seems sane, the first patch in the patchset
> should also get into 30.stable.
Btw., FYI, because my test-systems were frequently triggering those
bugs, i kept testing the following series from you and Mathieu in
-tip:
ecf8b04: cpufreq: Define dbs_mutex purpose and cleanup its usage conservative gov
b08c597: cpufreq: Define dbs_mutex purpose and cleanup its usage
0807e30: cpufreq: remove rwsem lock from CPUFREQ_GOV_STOP call (second call site)
So that fix-series, while probably not complete (given that you sent
a v2 series), worked well in practice and gets my:
Tested-by: Ingo Molnar <[email protected]>
Is the delta between this (tested) series and your v2 version
significant? If not it might make sense to shape it as a delta patch
to the v1 series, if that looks clean enough - to preserve testing
results.
Ingo
* Ingo Molnar ([email protected]) wrote:
>
> * [email protected] <[email protected]> wrote:
>
> > Since recent chanegs to ondemand and conservative governor, there
> > have been multiple reports of lockdep issues in cpufreq. Patch
> > series takes care of these problems.
> >
> > This is the next attempt following the one here, which was not a
> > complete fix.
> > http://lkml.indiana.edu/hypermail/linux/kernel/0906.3/01073.html
> >
> > I am currently running some stress tests to make sure there are no
> > issues with these patches. But, wanted to send them out for
> > review/comments/testing before I head out for the long weekend.
> >
> > If this patchset seems sane, the first patch in the patchset
> > should also get into 30.stable.
>
> Btw., FYI, because my test-systems were frequently triggering those
> bugs, i kept testing the following series from you and Mathieu in
> -tip:
>
> ecf8b04: cpufreq: Define dbs_mutex purpose and cleanup its usage conservative gov
> b08c597: cpufreq: Define dbs_mutex purpose and cleanup its usage
> 0807e30: cpufreq: remove rwsem lock from CPUFREQ_GOV_STOP call (second call site)
>
> So that fix-series, while probably not complete (given that you sent
> a v2 series), worked well in practice and gets my:
>
> Tested-by: Ingo Molnar <[email protected]>
>
> Is the delta between this (tested) series and your v2 version
> significant? If not it might make sense to shape it as a delta patch
> to the v1 series, if that looks clean enough - to preserve testing
> results.
The delta is very significant. The purpose of each lock changes quite a
bit. I'm preparing a patch serie that should just fix the problem
without significant locking semantic modification.
(not that I have time to do this, but I end up spending more time
looking at the proposed solutions than doing it..) ;)
Mathieu
>
> Ingo
>
--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
>-----Original Message-----
>From: Ingo Molnar [mailto:[email protected]]
>Sent: Thursday, July 02, 2009 11:54 PM
>To: Pallipadi, Venkatesh
>Cc: Dave Jones; [email protected];
>[email protected]; [email protected];
>Rafael J. Wysocki; Dave Young; Pekka Enberg; Mathieu
>Desnoyers; Thomas Renninger
>Subject: Re: [patch 0/4] Take care of cpufreq lockdep issues (take 2)
>
>
>* [email protected] <[email protected]> wrote:
>
>> Since recent chanegs to ondemand and conservative governor, there
>> have been multiple reports of lockdep issues in cpufreq. Patch
>> series takes care of these problems.
>>
>> This is the next attempt following the one here, which was not a
>> complete fix.
>> http://lkml.indiana.edu/hypermail/linux/kernel/0906.3/01073.html
>>
>> I am currently running some stress tests to make sure there are no
>> issues with these patches. But, wanted to send them out for
>> review/comments/testing before I head out for the long weekend.
>>
>> If this patchset seems sane, the first patch in the patchset
>> should also get into 30.stable.
>
>Btw., FYI, because my test-systems were frequently triggering those
>bugs, i kept testing the following series from you and Mathieu in
>-tip:
>
> ecf8b04: cpufreq: Define dbs_mutex purpose and cleanup its
>usage conservative gov
> b08c597: cpufreq: Define dbs_mutex purpose and cleanup its usage
> 0807e30: cpufreq: remove rwsem lock from CPUFREQ_GOV_STOP
>call (second call site)
>
>So that fix-series, while probably not complete (given that you sent
>a v2 series), worked well in practice and gets my:
>
> Tested-by: Ingo Molnar <[email protected]>
>
>Is the delta between this (tested) series and your v2 version
>significant? If not it might make sense to shape it as a delta patch
>to the v1 series, if that looks clean enough - to preserve testing
>results.
>
Thanks for testing. That earlier version even though it took care
of lockdep complaints, did not address all the race conditions properly.
The delta is significant as I had to change the approach compared
to first patchset. So, diff will not be very clean.
Thanks,
Venki-
* Pallipadi, Venkatesh <[email protected]> wrote:
>
>
> >-----Original Message-----
> >From: Ingo Molnar [mailto:[email protected]]
> >Sent: Thursday, July 02, 2009 11:54 PM
> >To: Pallipadi, Venkatesh
> >Cc: Dave Jones; [email protected];
> >[email protected]; [email protected];
> >Rafael J. Wysocki; Dave Young; Pekka Enberg; Mathieu
> >Desnoyers; Thomas Renninger
> >Subject: Re: [patch 0/4] Take care of cpufreq lockdep issues (take 2)
> >
> >
> >* [email protected] <[email protected]> wrote:
> >
> >> Since recent chanegs to ondemand and conservative governor, there
> >> have been multiple reports of lockdep issues in cpufreq. Patch
> >> series takes care of these problems.
> >>
> >> This is the next attempt following the one here, which was not a
> >> complete fix.
> >> http://lkml.indiana.edu/hypermail/linux/kernel/0906.3/01073.html
> >>
> >> I am currently running some stress tests to make sure there are no
> >> issues with these patches. But, wanted to send them out for
> >> review/comments/testing before I head out for the long weekend.
> >>
> >> If this patchset seems sane, the first patch in the patchset
> >> should also get into 30.stable.
> >
> >Btw., FYI, because my test-systems were frequently triggering those
> >bugs, i kept testing the following series from you and Mathieu in
> >-tip:
> >
> > ecf8b04: cpufreq: Define dbs_mutex purpose and cleanup its
> >usage conservative gov
> > b08c597: cpufreq: Define dbs_mutex purpose and cleanup its usage
> > 0807e30: cpufreq: remove rwsem lock from CPUFREQ_GOV_STOP
> >call (second call site)
> >
> >So that fix-series, while probably not complete (given that you sent
> >a v2 series), worked well in practice and gets my:
> >
> > Tested-by: Ingo Molnar <[email protected]>
> >
> >Is the delta between this (tested) series and your v2 version
> >significant? If not it might make sense to shape it as a delta patch
> >to the v1 series, if that looks clean enough - to preserve testing
> >results.
> >
>
> Thanks for testing. That earlier version even though it took care
> of lockdep complaints, did not address all the race conditions
> properly. The delta is significant as I had to change the approach
> compared to first patchset. So, diff will not be very clean.
Fair enough - these cases are when it makes sense to do a clean
rebase.
Ingo
On Thu, 2009-07-02 at 17:08 -0700, Pallipadi, Venkatesh wrote:
> Since recent chanegs to ondemand and conservative governor, there have been
> multiple reports of lockdep issues in cpufreq. Patch series takes care of
> these problems.
>
> This is the next attempt following the one here, which was not a complete fix.
> http://lkml.indiana.edu/hypermail/linux/kernel/0906.3/01073.html
>
> I am currently running some stress tests to make sure there are no issues with
> these patches. But, wanted to send them out for review/comments/testing
> before I head out for the long weekend.
>
> If this patchset seems sane, the first patch in the patchset should also get
> into 30.stable.
>
Just an update. I ran some stress test on few of my test machines over
the long weekend and havent seen any issues with this patchset.
Thanks,
Venki