While testing cpusets on asymmetric CPU capacity systems I realized that
the admission control, which kicks in when the cpumask of a cpuset is
about to be changed, is not capacity-aware. [PATCH 2/3] addresses this
issue.
Overview:
[PATCH 1/3] - Not part of the DL related changes but here for
convenience : Inline helper function around
static_branch_unlikely(&sched_asym_cpucapacity)
[PATCH 2/3] - Make dl_cpuset_cpumask_can_shrink() capacity-aware.
[PATCH 3/3] - Save a multiplication in dl_task_fits_capacity() by using
already maintained dl_density.
Dietmar Eggemann (3):
sched: Introduce sched_asym_cpucap_active()
sched/deadline: Make dl_cpuset_cpumask_can_shrink() capacity-aware
sched/deadline: Use sched_dl_entity's dl_density in
dl_task_fits_capacity()
kernel/sched/cpudeadline.c | 2 +-
kernel/sched/deadline.c | 28 +++++++++++++---------------
kernel/sched/fair.c | 8 ++++----
kernel/sched/rt.c | 4 ++--
kernel/sched/sched.h | 13 +++++++++----
5 files changed, 29 insertions(+), 26 deletions(-)
--
2.25.1
Create an inline helper for conditional code to be only executed on
asymmetric CPU capacity systems. This makes these (currently ~10 and
future) conditions a lot more readable.
Signed-off-by: Dietmar Eggemann <[email protected]>
---
kernel/sched/cpudeadline.c | 2 +-
kernel/sched/deadline.c | 4 ++--
kernel/sched/fair.c | 8 ++++----
kernel/sched/rt.c | 4 ++--
kernel/sched/sched.h | 5 +++++
5 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 02d970a879ed..57c92d751bcd 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -123,7 +123,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
unsigned long cap, max_cap = 0;
int cpu, max_cpu = -1;
- if (!static_branch_unlikely(&sched_asym_cpucapacity))
+ if (!sched_asym_cpucap_active())
return 1;
/* Ensure the capacity of the CPUs fits the task. */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 5867e186c39a..3f9d90b8a8b6 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -144,7 +144,7 @@ static inline unsigned long __dl_bw_capacity(int i)
*/
static inline unsigned long dl_bw_capacity(int i)
{
- if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
+ if (!sched_asym_cpucap_active() &&
capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
} else {
@@ -1846,7 +1846,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
* Take the capacity of the CPU into account to
* ensure it fits the requirement of the task.
*/
- if (static_branch_unlikely(&sched_asym_cpucapacity))
+ if (sched_asym_cpucap_active())
select_rq |= !dl_task_fits_capacity(p, cpu);
if (select_rq) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f80ae86bb404..f32e0866b9c9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4211,7 +4211,7 @@ static inline int task_fits_capacity(struct task_struct *p,
static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
{
- if (!static_branch_unlikely(&sched_asym_cpucapacity))
+ if (!sched_asym_cpucap_active())
return;
if (!p || p->nr_cpus_allowed == 1) {
@@ -6452,7 +6452,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
static inline bool asym_fits_capacity(unsigned long task_util, int cpu)
{
- if (static_branch_unlikely(&sched_asym_cpucapacity))
+ if (sched_asym_cpucap_active())
return fits_capacity(task_util, capacity_of(cpu));
return true;
@@ -6472,7 +6472,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
* On asymmetric system, update task utilization because we will check
* that the task fits with cpu's capacity.
*/
- if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+ if (sched_asym_cpucap_active()) {
sync_entity_load_avg(&p->se);
task_util = uclamp_task_util(p);
}
@@ -6526,7 +6526,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
* For asymmetric CPU capacity systems, our domain of interest is
* sd_asym_cpucapacity rather than sd_llc.
*/
- if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+ if (sched_asym_cpucap_active()) {
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
/*
* On an asymmetric CPU capacity system where an exclusive
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8c9ed9664840..f0c297c1bd78 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -509,7 +509,7 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
unsigned int cpu_cap;
/* Only heterogeneous systems can benefit from this check */
- if (!static_branch_unlikely(&sched_asym_cpucapacity))
+ if (!sched_asym_cpucap_active())
return true;
min_cap = uclamp_eff_value(p, UCLAMP_MIN);
@@ -1894,7 +1894,7 @@ static int find_lowest_rq(struct task_struct *task)
* If we're on asym system ensure we consider the different capacities
* of the CPUs when searching for the lowest_mask.
*/
- if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+ if (sched_asym_cpucap_active()) {
ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
task, lowest_mask,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 02c970501295..79f65a6799c6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1759,6 +1759,11 @@ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
extern struct static_key_false sched_asym_cpucapacity;
+static __always_inline bool sched_asym_cpucap_active(void)
+{
+ return static_branch_unlikely(&sched_asym_cpucapacity);
+}
+
struct sched_group_capacity {
atomic_t ref;
/*
--
2.25.1
dl_cpuset_cpumask_can_shrink() is used to validate whether there is
still enough CPU capacity for DL tasks in the reduced cpuset.
Currently it still operates on `# remaining CPUs in the cpuset` (1).
Change this to use the already capacity-aware DL admission control
__dl_overflow() for the `cpumask can shrink` test.
dl_b->bw = sched_rt_period << BW_SHIFT / sched_rt_period
dl_b->bw * (1) >= currently allocated bandwidth in root_domain (rd)
Replace (1) w/ `\Sum CPU capacity in rd >> SCHED_CAPACITY_SHIFT`
Adapt __dl_bw_capacity() to take a cpumask instead of a CPU number
argument so that `rd->span` and `cpumask of the reduced cpuset` can
be used here.
Signed-off-by: Dietmar Eggemann <[email protected]>
---
kernel/sched/deadline.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3f9d90b8a8b6..34de6060dea6 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -124,15 +124,12 @@ static inline int dl_bw_cpus(int i)
return cpus;
}
-static inline unsigned long __dl_bw_capacity(int i)
+static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
{
- struct root_domain *rd = cpu_rq(i)->rd;
unsigned long cap = 0;
+ int i;
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
- "sched RCU must be held");
-
- for_each_cpu_and(i, rd->span, cpu_active_mask)
+ for_each_cpu_and(i, mask, cpu_active_mask)
cap += capacity_orig_of(i);
return cap;
@@ -148,7 +145,10 @@ static inline unsigned long dl_bw_capacity(int i)
capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
} else {
- return __dl_bw_capacity(i);
+ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+ "sched RCU must be held");
+
+ return __dl_bw_capacity(cpu_rq(i)->rd->span);
}
}
@@ -3004,17 +3004,15 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
const struct cpumask *trial)
{
- int ret = 1, trial_cpus;
+ unsigned long flags, cap;
struct dl_bw *cur_dl_b;
- unsigned long flags;
+ int ret = 1;
rcu_read_lock_sched();
cur_dl_b = dl_bw_of(cpumask_any(cur));
- trial_cpus = cpumask_weight(trial);
-
+ cap = __dl_bw_capacity(trial);
raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
- if (cur_dl_b->bw != -1 &&
- cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
+ if (__dl_overflow(cur_dl_b, cap, 0, 0))
ret = 0;
raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
rcu_read_unlock_sched();
--
2.25.1
Save a multiplication in dl_task_fits_capacity() by using already
maintained per-sched_dl_entity (i.e. per-task) `dl_runtime/dl_deadline`
(dl_density).
cap_scale(dl_deadline, cap) >= dl_runtime
dl_deadline * cap >> SCHED_CAPACITY_SHIFT >= dl_runtime
cap >= dl_runtime << SCHED_CAPACITY_SHIFT / dl_deadline
with BW_SHIFT = 2 x SCHED_CAPACITY_SHIFT
cap >= (dl_runtime << BW_SHIFT / dl_deadline) >> SCHED_CAPACITY_SHIFT
cap >= dl_density >> SCHED_CAPACITY_SHIFT
__sched_setscheduler()->__checkparam_dl() ensures that the 2 corner
cases (if conditions) `runtime == RUNTIME_INF (-1)` and `period == 0`
of to_ratio(deadline, runtime) are not met when setting dl_density in
__sched_setscheduler()-> __setscheduler_params()->__setparam_dl().
Signed-off-by: Dietmar Eggemann <[email protected]>
---
kernel/sched/sched.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 79f65a6799c6..555f58d6c3a6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -324,15 +324,15 @@ struct dl_bw {
* Verify the fitness of task @p to run on @cpu taking into account the
* CPU original capacity and the runtime/deadline ratio of the task.
*
- * The function will return true if the CPU original capacity of the
- * @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the
- * task and false otherwise.
+ * The function will return true if the original capacity of @cpu is
+ * greater than or equal to task's deadline density right shifted by
+ * SCHED_CAPACITY_SHIFT and false otherwise.
*/
static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
{
unsigned long cap = arch_scale_cpu_capacity(cpu);
- return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;
+ return cap >= p->dl.dl_density >> SCHED_CAPACITY_SHIFT;
}
extern void init_dl_bw(struct dl_bw *dl_b);
--
2.25.1
On Wed, 29 Jun 2022 at 14:21, Dietmar Eggemann <[email protected]> wrote:
>
> Save a multiplication in dl_task_fits_capacity() by using already
> maintained per-sched_dl_entity (i.e. per-task) `dl_runtime/dl_deadline`
> (dl_density).
>
> cap_scale(dl_deadline, cap) >= dl_runtime
>
> dl_deadline * cap >> SCHED_CAPACITY_SHIFT >= dl_runtime
>
> cap >= dl_runtime << SCHED_CAPACITY_SHIFT / dl_deadline
>
> with BW_SHIFT = 2 x SCHED_CAPACITY_SHIFT
>
> cap >= (dl_runtime << BW_SHIFT / dl_deadline) >> SCHED_CAPACITY_SHIFT
>
> cap >= dl_density >> SCHED_CAPACITY_SHIFT
>
> __sched_setscheduler()->__checkparam_dl() ensures that the 2 corner
> cases (if conditions) `runtime == RUNTIME_INF (-1)` and `period == 0`
> of to_ratio(deadline, runtime) are not met when setting dl_density in
> __sched_setscheduler()-> __setscheduler_params()->__setparam_dl().
>
> Signed-off-by: Dietmar Eggemann <[email protected]>
> ---
> kernel/sched/sched.h | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 79f65a6799c6..555f58d6c3a6 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -324,15 +324,15 @@ struct dl_bw {
> * Verify the fitness of task @p to run on @cpu taking into account the
> * CPU original capacity and the runtime/deadline ratio of the task.
> *
> - * The function will return true if the CPU original capacity of the
> - * @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the
> - * task and false otherwise.
> + * The function will return true if the original capacity of @cpu is
> + * greater than or equal to task's deadline density right shifted by
> + * SCHED_CAPACITY_SHIFT and false otherwise.
> */
> static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
> {
> unsigned long cap = arch_scale_cpu_capacity(cpu);
>
> - return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;
> + return cap >= p->dl.dl_density >> SCHED_CAPACITY_SHIFT;
There is no direct relation between BW_SHIFT and SCHED_CAPACITY_SHIFT
and we can change one without modifying the other.
Should you use (BW_SHIFT-SCHED_CAPACITY_SHIFT) instead of SCHED_CAPACITY_SHIFT ?
> }
>
> extern void init_dl_bw(struct dl_bw *dl_b);
> --
> 2.25.1
>
On 06/07/2022 12:47, Vincent Guittot wrote:
> On Wed, 29 Jun 2022 at 14:21, Dietmar Eggemann <[email protected]> wrote:
[...]
>> static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
>> {
>> unsigned long cap = arch_scale_cpu_capacity(cpu);
>>
>> - return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;
>> + return cap >= p->dl.dl_density >> SCHED_CAPACITY_SHIFT;
>
> There is no direct relation between BW_SHIFT and SCHED_CAPACITY_SHIFT
> and we can change one without modifying the other.
>
> Should you use (BW_SHIFT-SCHED_CAPACITY_SHIFT) instead of SCHED_CAPACITY_SHIFT ?
Yes, that's better, similar to cpu_bw_dl(). Thanks!
[...]
Hi,
On 29/06/22 14:20, Dietmar Eggemann wrote:
> While testing cpusets on asymmetric CPU capacity systems I realized that
> the admission control, which kicks in when the cpumask of a cpuset is
> about to be changed, is not capacity-aware. [PATCH 2/3] addresses this
> issue.
>
> Overview:
>
> [PATCH 1/3] - Not part of the DL related changes but here for
> convenience : Inline helper function around
> static_branch_unlikely(&sched_asym_cpucapacity)
>
> [PATCH 2/3] - Make dl_cpuset_cpumask_can_shrink() capacity-aware.
>
> [PATCH 3/3] - Save a multiplication in dl_task_fits_capacity() by using
> already maintained dl_density.
I had a look as well and, other than Vincent's comment on 3/3, the set
looked good to me. Looking forward for v2. :)
Thanks,
Juri