Replace a bunch of cpumask_any*() instances with
cpumask_any*_distribute(), by injecting this little bit of random in
cpu selection, we reduce the chance two competing balance operations
working off the same lowest_mask pick the same CPU.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
include/linux/cpumask.h | 6 ++++++
kernel/sched/cpupri.c | 4 ++--
kernel/sched/deadline.c | 2 +-
kernel/sched/rt.c | 6 +++---
lib/cpumask.c | 18 ++++++++++++++++++
5 files changed, 30 insertions(+), 6 deletions(-)
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -199,6 +199,11 @@ static inline int cpumask_any_and_distri
return cpumask_next_and(-1, src1p, src2p);
}
+static inline int cpumask_any_distribute(const struct cpumask *srcp)
+{
+ return cpumask_first(srcp);
+}
+
#define for_each_cpu(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define for_each_cpu_not(cpu, mask) \
@@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask
unsigned int cpumask_local_spread(unsigned int i, int node);
int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p);
+int cpumask_any_distribute(const struct cpumask *srcp);
/**
* for_each_cpu - iterate over every cpu in a mask
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2001,7 +2001,7 @@ static int find_later_rq(struct task_str
if (this_cpu != -1)
return this_cpu;
- cpu = cpumask_any(later_mask);
+ cpu = cpumask_any_distribute(later_mask);
if (cpu < nr_cpu_ids)
return cpu;
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1752,8 +1752,8 @@ static int find_lowest_rq(struct task_st
return this_cpu;
}
- best_cpu = cpumask_first_and(lowest_mask,
- sched_domain_span(sd));
+ best_cpu = cpumask_any_and_distribute(lowest_mask,
+ sched_domain_span(sd));
if (best_cpu < nr_cpu_ids) {
rcu_read_unlock();
return best_cpu;
@@ -1770,7 +1770,7 @@ static int find_lowest_rq(struct task_st
if (this_cpu != -1)
return this_cpu;
- cpu = cpumask_any(lowest_mask);
+ cpu = cpumask_any_distribute(lowest_mask);
if (cpu < nr_cpu_ids)
return cpu;
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -267,3 +267,21 @@ int cpumask_any_and_distribute(const str
return next;
}
EXPORT_SYMBOL(cpumask_any_and_distribute);
+
+int cpumask_any_distribute(const struct cpumask *srcp)
+{
+ int next, prev;
+
+ /* NOTE: our first selection will skip 0. */
+ prev = __this_cpu_read(distribute_cpu_mask_prev);
+
+ next = cpumask_next(prev, srcp);
+ if (next >= nr_cpu_ids)
+ next = cpumask_first(srcp);
+
+ if (next < nr_cpu_ids)
+ __this_cpu_write(distribute_cpu_mask_prev, next);
+
+ return next;
+}
+EXPORT_SYMBOL(cpumask_any_distribute);
Hi,
On 05/10/20 16:57, Peter Zijlstra wrote:
> Replace a bunch of cpumask_any*() instances with
> cpumask_any*_distribute(), by injecting this little bit of random in
> cpu selection, we reduce the chance two competing balance operations
> working off the same lowest_mask pick the same CPU.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> include/linux/cpumask.h | 6 ++++++
> kernel/sched/cpupri.c | 4 ++--
> kernel/sched/deadline.c | 2 +-
> kernel/sched/rt.c | 6 +++---
> lib/cpumask.c | 18 ++++++++++++++++++
> 5 files changed, 30 insertions(+), 6 deletions(-)
>
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -199,6 +199,11 @@ static inline int cpumask_any_and_distri
> return cpumask_next_and(-1, src1p, src2p);
> }
>
> +static inline int cpumask_any_distribute(const struct cpumask *srcp)
> +{
> + return cpumask_first(srcp);
> +}
> +
> #define for_each_cpu(cpu, mask) \
> for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
> #define for_each_cpu_not(cpu, mask) \
> @@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask
> unsigned int cpumask_local_spread(unsigned int i, int node);
> int cpumask_any_and_distribute(const struct cpumask *src1p,
> const struct cpumask *src2p);
> +int cpumask_any_distribute(const struct cpumask *srcp);
>
> /**
> * for_each_cpu - iterate over every cpu in a mask
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -2001,7 +2001,7 @@ static int find_later_rq(struct task_str
> if (this_cpu != -1)
> return this_cpu;
>
> - cpu = cpumask_any(later_mask);
> + cpu = cpumask_any_distribute(later_mask);
> if (cpu < nr_cpu_ids)
> return cpu;
Think we can use cpumask_any_and_distribute() with later_mask for
deadline as well inside the for_each_domain loop as you do for rt below.
Best,
Juri
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1752,8 +1752,8 @@ static int find_lowest_rq(struct task_st
> return this_cpu;
> }
>
> - best_cpu = cpumask_first_and(lowest_mask,
> - sched_domain_span(sd));
> + best_cpu = cpumask_any_and_distribute(lowest_mask,
> + sched_domain_span(sd));
> if (best_cpu < nr_cpu_ids) {
> rcu_read_unlock();
> return best_cpu;
> @@ -1770,7 +1770,7 @@ static int find_lowest_rq(struct task_st
> if (this_cpu != -1)
> return this_cpu;
>
> - cpu = cpumask_any(lowest_mask);
> + cpu = cpumask_any_distribute(lowest_mask);
> if (cpu < nr_cpu_ids)
> return cpu;
On Tue, Oct 06, 2020 at 04:09:26PM +0200, Juri Lelli wrote:
> > --- a/kernel/sched/deadline.c
> > +++ b/kernel/sched/deadline.c
> > @@ -2001,7 +2001,7 @@ static int find_later_rq(struct task_str
> > if (this_cpu != -1)
> > return this_cpu;
> >
> > - cpu = cpumask_any(later_mask);
> > + cpu = cpumask_any_distribute(later_mask);
> > if (cpu < nr_cpu_ids)
> > return cpu;
>
> Think we can use cpumask_any_and_distribute() with later_mask for
> deadline as well inside the for_each_domain loop as you do for rt below.
Ah, indeed.. I missed it because it is cpumask_first, instead of
cpumask_any as with rt.
I folded the below.
---
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1981,8 +1981,8 @@ static int find_later_rq(struct task_str
return this_cpu;
}
- best_cpu = cpumask_first_and(later_mask,
- sched_domain_span(sd));
+ best_cpu = cpumask_any_and_distribute(later_mask,
+ sched_domain_span(sd));
/*
* Last chance: if a CPU being in both later_mask
* and current sd span is valid, that becomes our
On 10/05/20 16:57, Peter Zijlstra wrote:
> Replace a bunch of cpumask_any*() instances with
> cpumask_any*_distribute(), by injecting this little bit of random in
> cpu selection, we reduce the chance two competing balance operations
> working off the same lowest_mask pick the same CPU.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> include/linux/cpumask.h | 6 ++++++
> kernel/sched/cpupri.c | 4 ++--
> kernel/sched/deadline.c | 2 +-
> kernel/sched/rt.c | 6 +++---
> lib/cpumask.c | 18 ++++++++++++++++++
> 5 files changed, 30 insertions(+), 6 deletions(-)
>
[...]
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1752,8 +1752,8 @@ static int find_lowest_rq(struct task_st
> return this_cpu;
> }
>
> - best_cpu = cpumask_first_and(lowest_mask,
> - sched_domain_span(sd));
> + best_cpu = cpumask_any_and_distribute(lowest_mask,
> + sched_domain_span(sd));
I guess I should have done this 6 months ago and just got done with it :)
[email protected]
> if (best_cpu < nr_cpu_ids) {
> rcu_read_unlock();
> return best_cpu;
> @@ -1770,7 +1770,7 @@ static int find_lowest_rq(struct task_st
> if (this_cpu != -1)
> return this_cpu;
>
> - cpu = cpumask_any(lowest_mask);
> + cpu = cpumask_any_distribute(lowest_mask);
> if (cpu < nr_cpu_ids)
> return cpu;
>
> --- a/lib/cpumask.c
> +++ b/lib/cpumask.c
> @@ -267,3 +267,21 @@ int cpumask_any_and_distribute(const str
> return next;
> }
> EXPORT_SYMBOL(cpumask_any_and_distribute);
> +
> +int cpumask_any_distribute(const struct cpumask *srcp)
> +{
> + int next, prev;
> +
> + /* NOTE: our first selection will skip 0. */
> + prev = __this_cpu_read(distribute_cpu_mask_prev);
We had a discussion then that __this_cpu*() variant assumes preemption being
disabled and it's safer to use this_cpu*() variant instead. Still holds true
here?
Thanks
--
Qais Yousef
> +
> + next = cpumask_next(prev, srcp);
> + if (next >= nr_cpu_ids)
> + next = cpumask_first(srcp);
> +
> + if (next < nr_cpu_ids)
> + __this_cpu_write(distribute_cpu_mask_prev, next);
> +
> + return next;
> +}
> +EXPORT_SYMBOL(cpumask_any_distribute);
>
>
On Tue, Oct 06, 2020 at 04:55:27PM +0100, Qais Yousef wrote:
> > +int cpumask_any_distribute(const struct cpumask *srcp)
> > +{
> > + int next, prev;
> > +
> > + /* NOTE: our first selection will skip 0. */
> > + prev = __this_cpu_read(distribute_cpu_mask_prev);
>
> We had a discussion then that __this_cpu*() variant assumes preemption being
> disabled and it's safer to use this_cpu*() variant instead. Still holds true
> here?
I think we ended up with not caring. We wanted a 'random' value, we get
a 'random' value from a 'random' CPU, still works ;-)