2009-06-11 23:00:52

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 3/6] cpumask: avoid playing with cpus_allowed in powernow-k8.c

From: Rusty Russell <[email protected]>

It's generally a very bad idea to mug some process's cpumask: it could
legitimately and reasonably be changed by root, which could break us
(if done before our code) or them (if we restore the wrong value).

I did not replace powernowk8_target; it needs fixing, but it grabs a
mutex (so no smp_call_function_single here) but Mark points out it can
be called multiple times per second, so work_on_cpu is too heavy.

Signed-off-by: Rusty Russell <[email protected]>
To: [email protected]
To: [email protected]
Cc: [email protected]
---
arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 156 +++++++++++++++---------------
1 file changed, 82 insertions(+), 74 deletions(-)

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -510,41 +510,34 @@ static int core_voltage_post_transition(
return 0;
}

-static int check_supported_cpu(unsigned int cpu)
+static void check_supported_cpu(void *_rc)
{
- cpumask_t oldmask;
u32 eax, ebx, ecx, edx;
- unsigned int rc = 0;
+ int *rc = _rc;

- oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-
- if (smp_processor_id() != cpu) {
- printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
- goto out;
- }
+ *rc = -ENODEV;

if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
- goto out;
+ return;

eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
((eax & CPUID_XFAM) < CPUID_XFAM_10H))
- goto out;
+ return;

if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
printk(KERN_INFO PFX
"Processor cpuid %x not supported\n", eax);
- goto out;
+ return;
}

eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
printk(KERN_INFO PFX
"No frequency change capabilities detected\n");
- goto out;
+ return;
}

cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
@@ -552,21 +545,17 @@ static int check_supported_cpu(unsigned
!= P_STATE_TRANSITION_CAPABLE) {
printk(KERN_INFO PFX
"Power state transitions not supported\n");
- goto out;
+ return;
}
} else { /* must be a HW Pstate capable processor */
cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
cpu_family = CPU_HW_PSTATE;
else
- goto out;
+ return;
}

- rc = 1;
-
-out:
- set_cpus_allowed_ptr(current, &oldmask);
- return rc;
+ *rc = 0;
}

static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
@@ -1235,6 +1230,32 @@ static int powernowk8_verify(struct cpuf
return cpufreq_frequency_table_verify(pol, data->powernow_table);
}

+struct init_on_cpu {
+ struct powernow_k8_data *data;
+ int rc;
+};
+
+static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
+{
+ struct init_on_cpu *init_on_cpu = _init_on_cpu;
+
+ if (pending_bit_stuck()) {
+ printk(KERN_ERR PFX "failing init, change pending bit set\n");
+ init_on_cpu->rc = -ENODEV;
+ return;
+ }
+
+ if (query_current_values_with_pending_wait(init_on_cpu->data)) {
+ init_on_cpu->rc = -ENODEV;
+ return;
+ }
+
+ if (cpu_family == CPU_OPTERON)
+ fidvid_msr_init();
+
+ init_on_cpu->rc = 0;
+}
+
static const char ACPI_PSS_BIOS_BUG_MSG[] =
KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n";
@@ -1243,13 +1264,14 @@ static const char ACPI_PSS_BIOS_BUG_MSG[
static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
{
struct powernow_k8_data *data;
- cpumask_t oldmask;
+ struct init_on_cpu init_on_cpu;
int rc;

if (!cpu_online(pol->cpu))
return -ENODEV;

- if (!check_supported_cpu(pol->cpu))
+ smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
+ if (rc)
return -ENODEV;

data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
@@ -1289,27 +1311,12 @@ static int __cpuinit powernowk8_cpu_init
pol->cpuinfo.transition_latency = get_transition_latency(data);

/* only run on specific CPU from here on */
- oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
-
- if (smp_processor_id() != pol->cpu) {
- printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
- goto err_out_unmask;
- }
-
- if (pending_bit_stuck()) {
- printk(KERN_ERR PFX "failing init, change pending bit set\n");
- goto err_out_unmask;
- }
-
- if (query_current_values_with_pending_wait(data))
- goto err_out_unmask;
-
- if (cpu_family == CPU_OPTERON)
- fidvid_msr_init();
-
- /* run on any CPU again */
- set_cpus_allowed_ptr(current, &oldmask);
+ init_on_cpu.data = data;
+ smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
+ &init_on_cpu, 1);
+ rc = init_on_cpu.rc;
+ if (rc != 0)
+ goto err_out_exit_acpi;

if (cpu_family == CPU_HW_PSTATE)
cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
@@ -1346,8 +1353,7 @@ static int __cpuinit powernowk8_cpu_init

return 0;

-err_out_unmask:
- set_cpus_allowed_ptr(current, &oldmask);
+err_out_exit_acpi:
powernow_k8_cpu_exit_acpi(data);

err_out:
@@ -1372,12 +1378,20 @@ static int __devexit powernowk8_cpu_exit
return 0;
}

+static void query_values_on_cpu(void *_err)
+{
+ int *err = _err;
+ struct powernow_k8_data *data = __get_cpu_var(powernow_data);
+
+ *err = query_current_values_with_pending_wait(data);
+}
+
static unsigned int powernowk8_get(unsigned int cpu)
{
struct powernow_k8_data *data;
- cpumask_t oldmask = current->cpus_allowed;
unsigned int khz = 0;
unsigned int first;
+ int err;

first = cpumask_first(cpu_core_mask(cpu));
data = per_cpu(powernow_data, first);
@@ -1385,15 +1399,8 @@ static unsigned int powernowk8_get(unsig
if (!data)
return -EINVAL;

- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- if (smp_processor_id() != cpu) {
- printk(KERN_ERR PFX
- "limiting to CPU %d failed in powernowk8_get\n", cpu);
- set_cpus_allowed_ptr(current, &oldmask);
- return 0;
- }
-
- if (query_current_values_with_pending_wait(data))
+ smp_call_function_single(first, query_values_on_cpu, &err, true);
+ if (err)
goto out;

if (cpu_family == CPU_HW_PSTATE)
@@ -1404,7 +1411,6 @@ static unsigned int powernowk8_get(unsig


out:
- set_cpus_allowed_ptr(current, &oldmask);
return khz;
}

@@ -1430,7 +1436,9 @@ static int __cpuinit powernowk8_init(voi
unsigned int i, supported_cpus = 0;

for_each_online_cpu(i) {
- if (check_supported_cpu(i))
+ int rc;
+ smp_call_function_single(i, check_supported_cpu, &rc, 1);
+ if (rc == 0)
supported_cpus++;
}


2009-06-11 23:25:31

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH 3/6] cpumask: avoid playing with cpus_allowed in powernow-k8.c

On Thu, Jun 11, 2009 at 10:59:58PM +0930, Rusty Russell wrote:
> From: Rusty Russell <[email protected]>
>
> It's generally a very bad idea to mug some process's cpumask: it could
> legitimately and reasonably be changed by root, which could break us
> (if done before our code) or them (if we restore the wrong value).
>
> I did not replace powernowk8_target; it needs fixing, but it grabs a
> mutex (so no smp_call_function_single here) but Mark points out it can
> be called multiple times per second, so work_on_cpu is too heavy.

This one clashes with some diffs I merged earlier in cpufreq.git.
Can you rebase on top of that please?
(The other two were fine).

Otherwise, they all look ok to me. As long as they survive testing I
see no reason not to push them for .31.

Thanks,

Dave

2009-06-12 11:25:55

by Rusty Russell

[permalink] [raw]
Subject: [PATCH] cpumask: avoid playing with cpus_allowed in powernow-k8.c

cpumask: avoid playing with cpus_allowed in powernow-k8.c

It's generally a very bad idea to mug some process's cpumask: it could
legitimately and reasonably be changed by root, which could break us
(if done before our code) or them (if we restore the wrong value).

I did not replace powernowk8_target; it needs fixing, but it grabs a
mutex (so no smp_call_function_single here) but Mark points out it can
be called multiple times per second, so work_on_cpu is too heavy.

Signed-off-by: Rusty Russell <[email protected]>
To: [email protected]
To: [email protected]
Cc: [email protected]

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 20c7b99..1f55547 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -508,41 +508,34 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
return 0;
}

-static int check_supported_cpu(unsigned int cpu)
+static void check_supported_cpu(void *_rc)
{
- cpumask_t oldmask;
u32 eax, ebx, ecx, edx;
- unsigned int rc = 0;
-
- oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+ int *rc = _rc;

- if (smp_processor_id() != cpu) {
- printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
- goto out;
- }
+ *rc = -ENODEV;

if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
- goto out;
+ return;

eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
((eax & CPUID_XFAM) < CPUID_XFAM_10H))
- goto out;
+ return;

if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
printk(KERN_INFO PFX
"Processor cpuid %x not supported\n", eax);
- goto out;
+ return;
}

eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
printk(KERN_INFO PFX
"No frequency change capabilities detected\n");
- goto out;
+ return;
}

cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
@@ -550,21 +543,17 @@ static int check_supported_cpu(unsigned int cpu)
!= P_STATE_TRANSITION_CAPABLE) {
printk(KERN_INFO PFX
"Power state transitions not supported\n");
- goto out;
+ return;
}
} else { /* must be a HW Pstate capable processor */
cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
cpu_family = CPU_HW_PSTATE;
else
- goto out;
+ return;
}

- rc = 1;
-
-out:
- set_cpus_allowed_ptr(current, &oldmask);
- return rc;
+ *rc = 0;
}

static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
@@ -1247,6 +1236,32 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
return cpufreq_frequency_table_verify(pol, data->powernow_table);
}

+struct init_on_cpu {
+ struct powernow_k8_data *data;
+ int rc;
+};
+
+static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
+{
+ struct init_on_cpu *init_on_cpu = _init_on_cpu;
+
+ if (pending_bit_stuck()) {
+ printk(KERN_ERR PFX "failing init, change pending bit set\n");
+ init_on_cpu->rc = -ENODEV;
+ return;
+ }
+
+ if (query_current_values_with_pending_wait(init_on_cpu->data)) {
+ init_on_cpu->rc = -ENODEV;
+ return;
+ }
+
+ if (cpu_family == CPU_OPTERON)
+ fidvid_msr_init();
+
+ init_on_cpu->rc = 0;
+}
+
/* per CPU init entry point to the driver */
static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
{
@@ -1254,13 +1269,14 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n";
struct powernow_k8_data *data;
- cpumask_t oldmask;
+ struct init_on_cpu init_on_cpu;
int rc;

if (!cpu_online(pol->cpu))
return -ENODEV;

- if (!check_supported_cpu(pol->cpu))
+ smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
+ if (rc)
return -ENODEV;

data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
@@ -1300,27 +1316,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
pol->cpuinfo.transition_latency = get_transition_latency(data);

/* only run on specific CPU from here on */
- oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
-
- if (smp_processor_id() != pol->cpu) {
- printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
- goto err_out_unmask;
- }
-
- if (pending_bit_stuck()) {
- printk(KERN_ERR PFX "failing init, change pending bit set\n");
- goto err_out_unmask;
- }
-
- if (query_current_values_with_pending_wait(data))
- goto err_out_unmask;
-
- if (cpu_family == CPU_OPTERON)
- fidvid_msr_init();
-
- /* run on any CPU again */
- set_cpus_allowed_ptr(current, &oldmask);
+ init_on_cpu.data = data;
+ smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
+ &init_on_cpu, 1);
+ rc = init_on_cpu.rc;
+ if (rc != 0)
+ goto err_out_exit_acpi;

if (cpu_family == CPU_HW_PSTATE)
cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
@@ -1357,8 +1358,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)

return 0;

-err_out_unmask:
- set_cpus_allowed_ptr(current, &oldmask);
+err_out_exit_acpi:
powernow_k8_cpu_exit_acpi(data);

err_out:
@@ -1383,24 +1383,25 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
return 0;
}

+static void query_values_on_cpu(void *_err)
+{
+ int *err = _err;
+ struct powernow_k8_data *data = __get_cpu_var(powernow_data);
+
+ *err = query_current_values_with_pending_wait(data);
+}
+
static unsigned int powernowk8_get(unsigned int cpu)
{
struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
- cpumask_t oldmask = current->cpus_allowed;
unsigned int khz = 0;
+ int err;

if (!data)
return -EINVAL;

- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- if (smp_processor_id() != cpu) {
- printk(KERN_ERR PFX
- "limiting to CPU %d failed in powernowk8_get\n", cpu);
- set_cpus_allowed_ptr(current, &oldmask);
- return 0;
- }
-
- if (query_current_values_with_pending_wait(data))
+ smp_call_function_single(cpu, query_values_on_cpu, &err, true);
+ if (err)
goto out;

if (cpu_family == CPU_HW_PSTATE)
@@ -1411,7 +1412,6 @@ static unsigned int powernowk8_get(unsigned int cpu)


out:
- set_cpus_allowed_ptr(current, &oldmask);
return khz;
}

@@ -1437,7 +1437,9 @@ static int __cpuinit powernowk8_init(void)
unsigned int i, supported_cpus = 0;

for_each_online_cpu(i) {
- if (check_supported_cpu(i))
+ int rc;
+ smp_call_function_single(i, check_supported_cpu, &rc, 1);
+ if (rc == 0)
supported_cpus++;
}

2009-06-12 11:26:38

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 3/6] cpumask: avoid playing with cpus_allowed in powernow-k8.c

On Fri, 12 Jun 2009 08:55:17 am Dave Jones wrote:
> This one clashes with some diffs I merged earlier in cpufreq.git.
> Can you rebase on top of that please?
> (The other two were fine).

OK, done. Plus I have another (trivial) powernow-k8 patch (which still
applies with offset).

Thanks!
Rusty.

2009-06-12 11:28:47

by Rusty Russell

[permalink] [raw]
Subject: [PATCH] cpumask: new cpumask operators for arch/x86/kernel/cpu/cpufreq/powernow-k8.c

Remove all old-style cpumask operators, and cpumask_t.

Also: get rid of the unused define_siblings function.

Signed-off-by: Rusty Russell <[email protected]>
---
arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 8 ++++----
arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 11 -----------
2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1060,7 +1060,7 @@ static int transition_frequency_fidvid(s
freqs.old = find_khz_freq_from_fid(data->currfid);
freqs.new = find_khz_freq_from_fid(fid);

- for_each_cpu_mask_nr(i, *(data->available_cores)) {
+ for_each_cpu(i, data->available_cores) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -1068,7 +1068,7 @@ static int transition_frequency_fidvid(s
res = transition_fid_vid(data, fid, vid);
freqs.new = find_khz_freq_from_fid(data->currfid);

- for_each_cpu_mask_nr(i, *(data->available_cores)) {
+ for_each_cpu(i, data->available_cores) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
@@ -1093,7 +1093,7 @@ static int transition_frequency_pstate(s
data->currpstate);
freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);

- for_each_cpu_mask_nr(i, *(data->available_cores)) {
+ for_each_cpu(i, data->available_cores) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -1101,7 +1101,7 @@ static int transition_frequency_pstate(s
res = transition_pstate(data, pstate);
freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);

- for_each_cpu_mask_nr(i, *(data->available_cores)) {
+ for_each_cpu(i, data->available_cores) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -223,14 +223,3 @@ static void powernow_k8_acpi_pst_values(

static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
-
-#ifdef CONFIG_SMP
-static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
-{
-}
-#else
-static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
-{
- cpu_set(0, cpu_sharedcore_mask[0]);
-}
-#endif

2009-06-15 15:40:28

by Langsdorf, Mark

[permalink] [raw]
Subject: RE: [PATCH] cpumask: avoid playing with cpus_allowed in powernow-k8.c

> cpumask: avoid playing with cpus_allowed in powernow-k8.c
>
> It's generally a very bad idea to mug some process's cpumask: it could
> legitimately and reasonably be changed by root, which could break us
> (if done before our code) or them (if we restore the wrong value).
>
> I did not replace powernowk8_target; it needs fixing, but it grabs a
> mutex (so no smp_call_function_single here) but Mark points out it can
> be called multiple times per second, so work_on_cpu is too heavy.
>
> Signed-off-by: Rusty Russell <[email protected]>
> To: [email protected]
> To: [email protected]
> Cc: [email protected]

I ran this over the weekend on a 24 core box without
any issues. Please commit.

Acked-by: Mark Langsdorf <[email protected]>
Tested-by: Mark Langsdorf <[email protected]>

2009-06-15 15:56:17

by Langsdorf, Mark

[permalink] [raw]
Subject: RE: [PATCH] cpumask: new cpumask operators for arch/x86/kernel/cpu/cpufreq/powernow-k8.c

> Remove all old-style cpumask operators, and cpumask_t.
>
> Also: get rid of the unused define_siblings function.
>
> Signed-off-by: Rusty Russell <[email protected]>

I ran this over the weekend on a 24 core box without
any issues. Please commit.

Acked-by: Mark Langsdorf <[email protected]>
Tested-by: Mark Langsdorf <[email protected]>