Under CONFIG_SMP, dl_bw is per root domain, but not per CPU.
When checking or updating dl_bw, currently iterating every CPU is
overdoing, just need iterate each root domain once.
Suggested-by: Peter Zijlstra <[email protected]>
Signed-off-by: Peng Liu <[email protected]>
---
kernel/sched/deadline.c | 43 ++++++++++++++++++++++++++++++++---------
kernel/sched/sched.h | 7 +++++++
kernel/sched/topology.c | 1 +
3 files changed, 42 insertions(+), 9 deletions(-)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index c19c1883d695..5200e185923f 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -97,6 +97,17 @@ static inline unsigned long dl_bw_capacity(int i)
return __dl_bw_capacity(i);
}
}
+
+static inline bool dl_bw_visited(int cpu, u32 gen)
+{
+ struct root_domain *rd = cpu_rq(cpu)->rd;
+
+ if (rd->visit_gen == gen)
+ return true;
+
+ rd->visit_gen = gen;
+ return false;
+}
#else
static inline struct dl_bw *dl_bw_of(int i)
{
@@ -112,6 +123,11 @@ static inline unsigned long dl_bw_capacity(int i)
{
return SCHED_CAPACITY_SCALE;
}
+
+static inline bool dl_bw_visited(int cpu, u32 gen)
+{
+ return false;
+}
#endif
static inline
@@ -2514,26 +2530,30 @@ const struct sched_class dl_sched_class
.update_curr = update_curr_dl,
};
+/* Used for dl_bw check and update. */
+static u32 dl_generation;
+
int sched_dl_global_validate(void)
{
u64 runtime = global_rt_runtime();
u64 period = global_rt_period();
u64 new_bw = to_ratio(period, runtime);
struct dl_bw *dl_b;
- int cpu, ret = 0;
unsigned long flags;
+ int cpu, ret = 0;
+ u32 gen = ++dl_generation;
/*
* Here we want to check the bandwidth not being set to some
* value smaller than the currently allocated bandwidth in
* any of the root_domains.
- *
- * FIXME: Cycling on all the CPUs is overdoing, but simpler than
- * cycling on root_domains... Discussion on different/better
- * solutions is welcome!
*/
for_each_possible_cpu(cpu) {
rcu_read_lock_sched();
+
+ if (dl_bw_visited(cpu, gen))
+ goto next;
+
dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags);
@@ -2541,6 +2561,7 @@ int sched_dl_global_validate(void)
ret = -EBUSY;
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+next:
rcu_read_unlock_sched();
if (ret)
@@ -2567,8 +2588,9 @@ void sched_dl_do_global(void)
{
u64 new_bw = -1;
struct dl_bw *dl_b;
- int cpu;
unsigned long flags;
+ int cpu;
+ u32 gen = ++dl_generation;
def_dl_bandwidth.dl_period = global_rt_period();
def_dl_bandwidth.dl_runtime = global_rt_runtime();
@@ -2576,11 +2598,14 @@ void sched_dl_do_global(void)
if (global_rt_runtime() != RUNTIME_INF)
new_bw = to_ratio(global_rt_period(), global_rt_runtime());
- /*
- * FIXME: As above...
- */
for_each_possible_cpu(cpu) {
rcu_read_lock_sched();
+
+ if (dl_bw_visited(cpu, gen)) {
+ rcu_read_unlock_sched();
+ continue;
+ }
+
dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 28709f6b0975..53477e8b26b0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -798,6 +798,13 @@ struct root_domain {
*/
cpumask_var_t dlo_mask;
atomic_t dlo_count;
+
+ /*
+ * Indicate whether a root_domain's dl_bw has been checked or
+ * updated. It's monotonously increasing, then wrap around.
+ */
+ u32 visit_gen;
+
struct dl_bw dl_bw;
struct cpudl cpudl;
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index dd7770226086..90f3e5558fa2 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -516,6 +516,7 @@ static int init_rootdomain(struct root_domain *rd)
init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
#endif
+ rd->visit_gen = 0;
init_dl_bw(&rd->dl_bw);
if (cpudl_init(&rd->cpudl) != 0)
goto free_rto_mask;
--
2.20.1
On Wed, Oct 07, 2020 at 11:12:29PM +0800, Peng Liu wrote:
> +/* Used for dl_bw check and update. */
> +static u32 dl_generation;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 28709f6b0975..53477e8b26b0 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -798,6 +798,13 @@ struct root_domain {
> */
> cpumask_var_t dlo_mask;
> atomic_t dlo_count;
> +
> + /*
> + * Indicate whether a root_domain's dl_bw has been checked or
> + * updated. It's monotonously increasing, then wrap around.
> + */
> + u32 visit_gen;
> +
> struct dl_bw dl_bw;
> struct cpudl cpudl;
>
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index dd7770226086..90f3e5558fa2 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -516,6 +516,7 @@ static int init_rootdomain(struct root_domain *rd)
> init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
> #endif
>
> + rd->visit_gen = 0;
> init_dl_bw(&rd->dl_bw);
> if (cpudl_init(&rd->cpudl) != 0)
> goto free_rto_mask;
I'm fairly sure I made the generation a u64, the above is susceptible to
a false positive due to wrap-around.
Increase the generation to -1, create a new root domain, then the next
generation is 0 and we'll skip the new domain, even though it should be
updated.
On Wed, Oct 07, 2020 at 06:55:33PM +0200, Peter Zijlstra wrote:
> On Wed, Oct 07, 2020 at 11:12:29PM +0800, Peng Liu wrote:
> > +/* Used for dl_bw check and update. */
> > +static u32 dl_generation;
>
> > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> > index 28709f6b0975..53477e8b26b0 100644
> > --- a/kernel/sched/sched.h
> > +++ b/kernel/sched/sched.h
> > @@ -798,6 +798,13 @@ struct root_domain {
> > */
> > cpumask_var_t dlo_mask;
> > atomic_t dlo_count;
> > +
> > + /*
> > + * Indicate whether a root_domain's dl_bw has been checked or
> > + * updated. It's monotonously increasing, then wrap around.
> > + */
> > + u32 visit_gen;
> > +
> > struct dl_bw dl_bw;
> > struct cpudl cpudl;
> >
> > diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> > index dd7770226086..90f3e5558fa2 100644
> > --- a/kernel/sched/topology.c
> > +++ b/kernel/sched/topology.c
> > @@ -516,6 +516,7 @@ static int init_rootdomain(struct root_domain *rd)
> > init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
> > #endif
> >
> > + rd->visit_gen = 0;
> > init_dl_bw(&rd->dl_bw);
> > if (cpudl_init(&rd->cpudl) != 0)
> > goto free_rto_mask;
>
> I'm fairly sure I made the generation a u64, the above is susceptible to
> a false positive due to wrap-around.
>
> Increase the generation to -1, create a new root domain, then the next
> generation is 0 and we'll skip the new domain, even though it should be
> updated.
Ah... at first, I also thought that u32 is "big enough" given that
no one would frequently change the settings, 'wrap-around' shouldn't
be a concern.
So...OK, I will revert it back to u64. What a big circle! :)
Thanks for your time!