2009-11-03 04:23:12

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 1/14] cpumask: simplify sched_rt.c


find_lowest_rq() wants to call pick_optimal_cpu() on the intersection
of sched_domain_span(sd) and lowest_mask. Rather than doing a cpus_and
into a temporary, we can open-code it.

This actually makes the code slightly clearer, IMHO.

Signed-off-by: Rusty Russell <[email protected]>
To: Steven Rostedt <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
kernel/sched_rt.c | 59 +++++++++++++++++++++---------------------------------
1 file changed, 23 insertions(+), 36 deletions(-)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1115,29 +1115,12 @@ static struct task_struct *pick_next_hig

static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);

-static inline int pick_optimal_cpu(int this_cpu,
- const struct cpumask *mask)
-{
- int first;
-
- /* "this_cpu" is cheaper to preempt than a remote processor */
- if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
- return this_cpu;
-
- first = cpumask_first(mask);
- if (first < nr_cpu_ids)
- return first;
-
- return -1;
-}
-
static int find_lowest_rq(struct task_struct *task)
{
struct sched_domain *sd;
struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
- cpumask_var_t domain_mask;

if (task->rt.nr_cpus_allowed == 1)
return -1; /* No other targets possible */
@@ -1167,28 +1150,26 @@ static int find_lowest_rq(struct task_st
* Otherwise, we consult the sched_domains span maps to figure
* out which cpu is logically closest to our hot cache data.
*/
- if (this_cpu == cpu)
- this_cpu = -1; /* Skip this_cpu opt if the same */
+ if (!cpumask_test_cpu(this_cpu, lowest_mask))
+ this_cpu = -1; /* Skip this_cpu opt if not among lowest */

- if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
- for_each_domain(cpu, sd) {
- if (sd->flags & SD_WAKE_AFFINE) {
- int best_cpu;
+ for_each_domain(cpu, sd) {
+ if (sd->flags & SD_WAKE_AFFINE) {
+ int best_cpu;

- cpumask_and(domain_mask,
- sched_domain_span(sd),
- lowest_mask);
+ /*
+ * "this_cpu" is cheaper to preempt than a
+ * remote processor.
+ */
+ if (this_cpu != -1 &&
+ cpumask_test_cpu(this_cpu, sched_domain_span(sd)))
+ return this_cpu;

- best_cpu = pick_optimal_cpu(this_cpu,
- domain_mask);
-
- if (best_cpu != -1) {
- free_cpumask_var(domain_mask);
- return best_cpu;
- }
- }
+ best_cpu = cpumask_first_and(lowest_mask,
+ sched_domain_span(sd));
+ if (best_cpu < nr_cpu_ids)
+ return best_cpu;
}
- free_cpumask_var(domain_mask);
}

/*
@@ -1196,7 +1177,13 @@ static int find_lowest_rq(struct task_st
* just give the caller *something* to work with from the compatible
* locations.
*/
- return pick_optimal_cpu(this_cpu, lowest_mask);
+ if (this_cpu != -1)
+ return this_cpu;
+
+ cpu = cpumask_any(lowest_mask);
+ if (cpu < nr_cpu_ids)
+ return cpu;
+ return -1;
}

/* Will lock the rq it finds */


2009-11-03 16:41:49

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 1/14] cpumask: simplify sched_rt.c

Greg,

I believe this was mostly your code. Can you review this change.

Thanks,

-- Steve


On Tue, 2009-11-03 at 14:53 +1030, Rusty Russell wrote:
> find_lowest_rq() wants to call pick_optimal_cpu() on the intersection
> of sched_domain_span(sd) and lowest_mask. Rather than doing a cpus_and
> into a temporary, we can open-code it.
>
> This actually makes the code slightly clearer, IMHO.
>
> Signed-off-by: Rusty Russell <[email protected]>
> To: Steven Rostedt <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> ---
> kernel/sched_rt.c | 59 +++++++++++++++++++++---------------------------------
> 1 file changed, 23 insertions(+), 36 deletions(-)
>
> diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
> --- a/kernel/sched_rt.c
> +++ b/kernel/sched_rt.c
> @@ -1115,29 +1115,12 @@ static struct task_struct *pick_next_hig
>
> static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
>
> -static inline int pick_optimal_cpu(int this_cpu,
> - const struct cpumask *mask)
> -{
> - int first;
> -
> - /* "this_cpu" is cheaper to preempt than a remote processor */
> - if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
> - return this_cpu;
> -
> - first = cpumask_first(mask);
> - if (first < nr_cpu_ids)
> - return first;
> -
> - return -1;
> -}
> -
> static int find_lowest_rq(struct task_struct *task)
> {
> struct sched_domain *sd;
> struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
> int this_cpu = smp_processor_id();
> int cpu = task_cpu(task);
> - cpumask_var_t domain_mask;
>
> if (task->rt.nr_cpus_allowed == 1)
> return -1; /* No other targets possible */
> @@ -1167,28 +1150,26 @@ static int find_lowest_rq(struct task_st
> * Otherwise, we consult the sched_domains span maps to figure
> * out which cpu is logically closest to our hot cache data.
> */
> - if (this_cpu == cpu)
> - this_cpu = -1; /* Skip this_cpu opt if the same */
> + if (!cpumask_test_cpu(this_cpu, lowest_mask))
> + this_cpu = -1; /* Skip this_cpu opt if not among lowest */
>
> - if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
> - for_each_domain(cpu, sd) {
> - if (sd->flags & SD_WAKE_AFFINE) {
> - int best_cpu;
> + for_each_domain(cpu, sd) {
> + if (sd->flags & SD_WAKE_AFFINE) {
> + int best_cpu;
>
> - cpumask_and(domain_mask,
> - sched_domain_span(sd),
> - lowest_mask);
> + /*
> + * "this_cpu" is cheaper to preempt than a
> + * remote processor.
> + */
> + if (this_cpu != -1 &&
> + cpumask_test_cpu(this_cpu, sched_domain_span(sd)))
> + return this_cpu;
>
> - best_cpu = pick_optimal_cpu(this_cpu,
> - domain_mask);
> -
> - if (best_cpu != -1) {
> - free_cpumask_var(domain_mask);
> - return best_cpu;
> - }
> - }
> + best_cpu = cpumask_first_and(lowest_mask,
> + sched_domain_span(sd));
> + if (best_cpu < nr_cpu_ids)
> + return best_cpu;
> }
> - free_cpumask_var(domain_mask);
> }
>
> /*
> @@ -1196,7 +1177,13 @@ static int find_lowest_rq(struct task_st
> * just give the caller *something* to work with from the compatible
> * locations.
> */
> - return pick_optimal_cpu(this_cpu, lowest_mask);
> + if (this_cpu != -1)
> + return this_cpu;
> +
> + cpu = cpumask_any(lowest_mask);
> + if (cpu < nr_cpu_ids)
> + return cpu;
> + return -1;
> }
>
> /* Will lock the rq it finds */
>

2009-11-03 21:22:20

by Gregory Haskins

[permalink] [raw]
Subject: Re: [PATCH 1/14] cpumask: simplify sched_rt.c

>>> On 11/3/2009 at 11:41 AM, in message
<[email protected]>, Steven Rostedt
<[email protected]> wrote:
> Greg,
>
> I believe this was mostly your code. Can you review this change.
>

Hi Steve,
FYI: Looking now. The diff hunk layout is a little confusing, so I need to think about it a little longer.

Kind Regards,
-Greg

2009-11-03 21:40:55

by Gregory Haskins

[permalink] [raw]
Subject: Re: [PATCH 1/14] cpumask: simplify sched_rt.c

>>> On 11/3/2009 at 11:41 AM, in message
<[email protected]>, Steven Rostedt
<[email protected]> wrote:
> Greg,
>
> I believe this was mostly your code. Can you review this change.
>
> Thanks,
>
> -- Steve
>
>
> On Tue, 2009-11-03 at 14:53 +1030, Rusty Russell wrote:
>> find_lowest_rq() wants to call pick_optimal_cpu() on the intersection
>> of sched_domain_span(sd) and lowest_mask. Rather than doing a cpus_and
>> into a temporary, we can open-code it.
>>
>> This actually makes the code slightly clearer, IMHO.
>>
>> Signed-off-by: Rusty Russell <[email protected]>
>> To: Steven Rostedt <[email protected]>
>> Cc: Ingo Molnar <[email protected]>

I am not sure that I agree that its "clearer", but I do like the fact that the potentially nasty dynamic mask allocation is removed from the fast path.

I checked the logic, and it does appear to be functionally equivalent to the original.

Acked-by: Gregory Haskins <[email protected]>

>> ---
>> kernel/sched_rt.c | 59 +++++++++++++++++++++---------------------------------
>> 1 file changed, 23 insertions(+), 36 deletions(-)
>>
>> diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
>> --- a/kernel/sched_rt.c
>> +++ b/kernel/sched_rt.c
>> @@ -1115,29 +1115,12 @@ static struct task_struct *pick_next_hig
>>
>> static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
>>
>> -static inline int pick_optimal_cpu(int this_cpu,
>> - const struct cpumask *mask)
>> -{
>> - int first;
>> -
>> - /* "this_cpu" is cheaper to preempt than a remote processor */
>> - if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
>> - return this_cpu;
>> -
>> - first = cpumask_first(mask);
>> - if (first < nr_cpu_ids)
>> - return first;
>> -
>> - return -1;
>> -}
>> -
>> static int find_lowest_rq(struct task_struct *task)
>> {
>> struct sched_domain *sd;
>> struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
>> int this_cpu = smp_processor_id();
>> int cpu = task_cpu(task);
>> - cpumask_var_t domain_mask;
>>
>> if (task->rt.nr_cpus_allowed == 1)
>> return -1; /* No other targets possible */
>> @@ -1167,28 +1150,26 @@ static int find_lowest_rq(struct task_st
>> * Otherwise, we consult the sched_domains span maps to figure
>> * out which cpu is logically closest to our hot cache data.
>> */
>> - if (this_cpu == cpu)
>> - this_cpu = -1; /* Skip this_cpu opt if the same */
>> + if (!cpumask_test_cpu(this_cpu, lowest_mask))
>> + this_cpu = -1; /* Skip this_cpu opt if not among lowest */
>>
>> - if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
>> - for_each_domain(cpu, sd) {
>> - if (sd->flags & SD_WAKE_AFFINE) {
>> - int best_cpu;
>> + for_each_domain(cpu, sd) {
>> + if (sd->flags & SD_WAKE_AFFINE) {
>> + int best_cpu;
>>
>> - cpumask_and(domain_mask,
>> - sched_domain_span(sd),
>> - lowest_mask);
>> + /*
>> + * "this_cpu" is cheaper to preempt than a
>> + * remote processor.
>> + */
>> + if (this_cpu != -1 &&
>> + cpumask_test_cpu(this_cpu, sched_domain_span(sd)))
>> + return this_cpu;
>>
>> - best_cpu = pick_optimal_cpu(this_cpu,
>> - domain_mask);
>> -
>> - if (best_cpu != -1) {
>> - free_cpumask_var(domain_mask);
>> - return best_cpu;
>> - }
>> - }
>> + best_cpu = cpumask_first_and(lowest_mask,
>> + sched_domain_span(sd));
>> + if (best_cpu < nr_cpu_ids)
>> + return best_cpu;
>> }
>> - free_cpumask_var(domain_mask);
>> }
>>
>> /*
>> @@ -1196,7 +1177,13 @@ static int find_lowest_rq(struct task_st
>> * just give the caller *something* to work with from the compatible
>> * locations.
>> */
>> - return pick_optimal_cpu(this_cpu, lowest_mask);
>> + if (this_cpu != -1)
>> + return this_cpu;
>> +
>> + cpu = cpumask_any(lowest_mask);
>> + if (cpu < nr_cpu_ids)
>> + return cpu;
>> + return -1;
>> }
>>
>> /* Will lock the rq it finds */
>>

2009-11-04 15:26:42

by Rusty Russell

[permalink] [raw]
Subject: [tip:sched/core] cpumask: Simplify sched_rt.c

Commit-ID: e2c880630438f80b474378d5487b511b07665051
Gitweb: http://git.kernel.org/tip/e2c880630438f80b474378d5487b511b07665051
Author: Rusty Russell <[email protected]>
AuthorDate: Tue, 3 Nov 2009 14:53:15 +1030
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 4 Nov 2009 13:16:38 +0100

cpumask: Simplify sched_rt.c

find_lowest_rq() wants to call pick_optimal_cpu() on the
intersection of sched_domain_span(sd) and lowest_mask. Rather
than doing a cpus_and into a temporary, we can open-code it.

This actually makes the code slightly clearer, IMHO.

Signed-off-by: Rusty Russell <[email protected]>
Acked-by: Gregory Haskins <[email protected]>
Cc: Steven Rostedt <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/sched_rt.c | 61 ++++++++++++++++++++--------------------------------
1 files changed, 24 insertions(+), 37 deletions(-)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index a4d790c..5c5fef3 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1153,29 +1153,12 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)

static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);

-static inline int pick_optimal_cpu(int this_cpu,
- const struct cpumask *mask)
-{
- int first;
-
- /* "this_cpu" is cheaper to preempt than a remote processor */
- if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
- return this_cpu;
-
- first = cpumask_first(mask);
- if (first < nr_cpu_ids)
- return first;
-
- return -1;
-}
-
static int find_lowest_rq(struct task_struct *task)
{
struct sched_domain *sd;
struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
- cpumask_var_t domain_mask;

if (task->rt.nr_cpus_allowed == 1)
return -1; /* No other targets possible */
@@ -1198,28 +1181,26 @@ static int find_lowest_rq(struct task_struct *task)
* Otherwise, we consult the sched_domains span maps to figure
* out which cpu is logically closest to our hot cache data.
*/
- if (this_cpu == cpu)
- this_cpu = -1; /* Skip this_cpu opt if the same */
-
- if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
- for_each_domain(cpu, sd) {
- if (sd->flags & SD_WAKE_AFFINE) {
- int best_cpu;
+ if (!cpumask_test_cpu(this_cpu, lowest_mask))
+ this_cpu = -1; /* Skip this_cpu opt if not among lowest */

- cpumask_and(domain_mask,
- sched_domain_span(sd),
- lowest_mask);
+ for_each_domain(cpu, sd) {
+ if (sd->flags & SD_WAKE_AFFINE) {
+ int best_cpu;

- best_cpu = pick_optimal_cpu(this_cpu,
- domain_mask);
-
- if (best_cpu != -1) {
- free_cpumask_var(domain_mask);
- return best_cpu;
- }
- }
+ /*
+ * "this_cpu" is cheaper to preempt than a
+ * remote processor.
+ */
+ if (this_cpu != -1 &&
+ cpumask_test_cpu(this_cpu, sched_domain_span(sd)))
+ return this_cpu;
+
+ best_cpu = cpumask_first_and(lowest_mask,
+ sched_domain_span(sd));
+ if (best_cpu < nr_cpu_ids)
+ return best_cpu;
}
- free_cpumask_var(domain_mask);
}

/*
@@ -1227,7 +1208,13 @@ static int find_lowest_rq(struct task_struct *task)
* just give the caller *something* to work with from the compatible
* locations.
*/
- return pick_optimal_cpu(this_cpu, lowest_mask);
+ if (this_cpu != -1)
+ return this_cpu;
+
+ cpu = cpumask_any(lowest_mask);
+ if (cpu < nr_cpu_ids)
+ return cpu;
+ return -1;
}

/* Will lock the rq it finds */