If the user wants to stop controlling uclamp and let the task inherit
the value from the group, we need a method to reset.
Add SCHED_FLAG_UTIL_CLAMP_RESET flag to allow the user to reset uclamp via
sched_setattr syscall.
The policy is
_CLAMP_RESET => reset both min and max
_CLAMP_RESET | _CLAMP_MIN => reset min value
_CLAMP_RESET | _CLAMP_MAX => reset max value
_CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
Signed-off-by: Yun Hsiang <[email protected]>
---
include/uapi/linux/sched.h | 7 +++++--
kernel/sched/core.c | 41 +++++++++++++++++++++++++++++++-------
2 files changed, 39 insertions(+), 9 deletions(-)
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ceab2..6c823ddb1a1e 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -132,17 +132,20 @@ struct clone_args {
#define SCHED_FLAG_KEEP_PARAMS 0x10
#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
+#define SCHED_FLAG_UTIL_CLAMP_RESET 0x80
#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
SCHED_FLAG_KEEP_PARAMS)
#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
- SCHED_FLAG_UTIL_CLAMP_MAX)
+ SCHED_FLAG_UTIL_CLAMP_MAX | \
+ SCHED_FLAG_UTIL_CLAMP_RESET)
#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
SCHED_FLAG_RECLAIM | \
SCHED_FLAG_DL_OVERRUN | \
SCHED_FLAG_KEEP_ALL | \
- SCHED_FLAG_UTIL_CLAMP)
+ SCHED_FLAG_UTIL_CLAMP | \
+ SCHED_FLAG_UTIL_CLAMP_RESET)
#endif /* _UAPI_LINUX_SCHED_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8160ab5263f8..b337f8d3b5ae 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1438,9 +1438,22 @@ static int uclamp_validate(struct task_struct *p,
return 0;
}
+static unsigned int __default_uclamp_value(struct task_struct *p,
+ int clamp_id)
+{
+ if (rt_task(p) && clamp_id == UCLAMP_MIN)
+ return sysctl_sched_uclamp_util_min_rt_default;
+ else
+ return uclamp_none(clamp_id);
+}
+
static void __setscheduler_uclamp(struct task_struct *p,
const struct sched_attr *attr)
{
+ int reset = attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_RESET;
+ bool user_defined;
+ unsigned int clamp_value;
+ unsigned long flags = attr->sched_flags & SCHED_FLAG_UTIL_CLAMP;
enum uclamp_id clamp_id;
/*
@@ -1451,7 +1464,8 @@ static void __setscheduler_uclamp(struct task_struct *p,
struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
/* Keep using defined clamps across class changes */
- if (uc_se->user_defined)
+ if (flags != SCHED_FLAG_UTIL_CLAMP_RESET &&
+ uc_se->user_defined)
continue;
/*
@@ -1462,20 +1476,33 @@ static void __setscheduler_uclamp(struct task_struct *p,
__uclamp_update_util_min_rt_default(p);
else
uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
-
}
- if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
+ if (likely(!flags || flags == SCHED_FLAG_UTIL_CLAMP_RESET))
return;
- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
+ if (flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
+ if (reset) {
+ clamp_value = __default_uclamp_value(p, UCLAMP_MIN);
+ user_defined = false;
+ } else {
+ clamp_value = attr->sched_util_min;
+ user_defined = true;
+ }
uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
- attr->sched_util_min, true);
+ clamp_value, user_defined);
}
- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
+ if (flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
+ if (reset) {
+ clamp_value = __default_uclamp_value(p, UCLAMP_MAX);
+ user_defined = false;
+ } else {
+ clamp_value = attr->sched_util_max;
+ user_defined = true;
+ }
uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
- attr->sched_util_max, true);
+ clamp_value, user_defined);
}
}
--
2.25.1
On 25/10/2020 08:36, Yun Hsiang wrote:
> If the user wants to stop controlling uclamp and let the task inherit
> the value from the group, we need a method to reset.
>
> Add SCHED_FLAG_UTIL_CLAMP_RESET flag to allow the user to reset uclamp via
> sched_setattr syscall.
>
> The policy is
> _CLAMP_RESET => reset both min and max
> _CLAMP_RESET | _CLAMP_MIN => reset min value
> _CLAMP_RESET | _CLAMP_MAX => reset max value
> _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
>
> Signed-off-by: Yun Hsiang <[email protected]>
[...]
> @@ -1451,7 +1464,8 @@ static void __setscheduler_uclamp(struct task_struct *p,
> struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
>
> /* Keep using defined clamps across class changes */
> - if (uc_se->user_defined)
> + if (flags != SCHED_FLAG_UTIL_CLAMP_RESET &&
> + uc_se->user_defined)
> continue;
With:
(1) _CLAMP_RESET => reset both min and max
(2) _CLAMP_RESET | _CLAMP_MIN => reset min value
(3) _CLAMP_RESET | _CLAMP_MAX => reset max value
(4) _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
If you reset an RT task with (1) you don't reset its uclamp.min value.
__uclamp_update_util_min_rt_default(p) doesn't know about
SCHED_FLAG_UTIL_CLAMP_RESET. It only knows user_defined and will bail early.
[...]
> - if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
> + if (likely(!flags || flags == SCHED_FLAG_UTIL_CLAMP_RESET))
> return;
>
> - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> + if (flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> + if (reset) {
> + clamp_value = __default_uclamp_value(p, UCLAMP_MIN);
> + user_defined = false;
> + } else {
> + clamp_value = attr->sched_util_min;
> + user_defined = true;
> + }
Why do you reset for (1) in the for_each_clamp_id(clamp_id) loop and for
(2)-(4) in the if (flags & SCHED_FLAG_UTIL_CLAMP_MXX) condition?
You could reset (1)-(4) in the for_each_clamp_id(clamp_id) loop? In this
case you wouldn't need __default_uclamp_value().
[...]
Hi Dietmar,
On Mon, Oct 26, 2020 at 10:47:11AM +0100, Dietmar Eggemann wrote:
> On 25/10/2020 08:36, Yun Hsiang wrote:
> > If the user wants to stop controlling uclamp and let the task inherit
> > the value from the group, we need a method to reset.
> >
> > Add SCHED_FLAG_UTIL_CLAMP_RESET flag to allow the user to reset uclamp via
> > sched_setattr syscall.
> >
> > The policy is
> > _CLAMP_RESET => reset both min and max
> > _CLAMP_RESET | _CLAMP_MIN => reset min value
> > _CLAMP_RESET | _CLAMP_MAX => reset max value
> > _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
> >
> > Signed-off-by: Yun Hsiang <[email protected]>
>
> [...]
>
> > @@ -1451,7 +1464,8 @@ static void __setscheduler_uclamp(struct task_struct *p,
> > struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
> >
> > /* Keep using defined clamps across class changes */
> > - if (uc_se->user_defined)
> > + if (flags != SCHED_FLAG_UTIL_CLAMP_RESET &&
> > + uc_se->user_defined)
> > continue;
>
> With:
>
> (1) _CLAMP_RESET => reset both min and max
> (2) _CLAMP_RESET | _CLAMP_MIN => reset min value
> (3) _CLAMP_RESET | _CLAMP_MAX => reset max value
> (4) _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
>
> If you reset an RT task with (1) you don't reset its uclamp.min value.
>
> __uclamp_update_util_min_rt_default(p) doesn't know about
> SCHED_FLAG_UTIL_CLAMP_RESET. It only knows user_defined and will bail early.
>
Sorry I didn't notice __uclamp_update_util_min_rt_default will return
directly if user_defined is set, I'll fix it.
> [...]
>
> > - if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
> > + if (likely(!flags || flags == SCHED_FLAG_UTIL_CLAMP_RESET))
> > return;
> >
> > - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> > + if (flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> > + if (reset) {
> > + clamp_value = __default_uclamp_value(p, UCLAMP_MIN);
> > + user_defined = false;
> > + } else {
> > + clamp_value = attr->sched_util_min;
> > + user_defined = true;
> > + }
>
> Why do you reset for (1) in the for_each_clamp_id(clamp_id) loop and for
> (2)-(4) in the if (flags & SCHED_FLAG_UTIL_CLAMP_MXX) condition?
>
> You could reset (1)-(4) in the for_each_clamp_id(clamp_id) loop? In this
> case you wouldn't need __default_uclamp_value().
Do you mean adding these code in for_each_clamp_id(clamp_id) loop?
if (clamp_id == UCLAMP_MIN) {
if (flags == SCHED_FLAG_UTIL_CLAMP_RESET ||
(reset && (flags || SCHED_FLAG_UTIL_CLAMP_MIN)) ||
!se->user_defined) {
if (task_rt(p)) {
clamp_value = sysctl_sched_uclamp_util_min_rt_default
} else {
clamp_value = uclamp_none(clamp_id);
}
} else
continue;
}
/* similar code for UCLAMP_MAX */
...
uclamp_se_set(uc_se, clamp_value, false);
It seems more clear.
If you think this one is better, I'll use this method and send patch V4.
> [...]
On 26/10/2020 16:45, Yun Hsiang wrote:
> Hi Dietmar,
>
> On Mon, Oct 26, 2020 at 10:47:11AM +0100, Dietmar Eggemann wrote:
>> On 25/10/2020 08:36, Yun Hsiang wrote:
>>> If the user wants to stop controlling uclamp and let the task inherit
>>> the value from the group, we need a method to reset.
>>>
>>> Add SCHED_FLAG_UTIL_CLAMP_RESET flag to allow the user to reset uclamp via
>>> sched_setattr syscall.
>>>
>>> The policy is
>>> _CLAMP_RESET => reset both min and max
>>> _CLAMP_RESET | _CLAMP_MIN => reset min value
>>> _CLAMP_RESET | _CLAMP_MAX => reset max value
>>> _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
>>>
>>> Signed-off-by: Yun Hsiang <[email protected]>
>>
>> [...]
>>
>>> @@ -1451,7 +1464,8 @@ static void __setscheduler_uclamp(struct task_struct *p,
>>> struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
>>>
>>> /* Keep using defined clamps across class changes */
>>> - if (uc_se->user_defined)
>>> + if (flags != SCHED_FLAG_UTIL_CLAMP_RESET &&
>>> + uc_se->user_defined)
>>> continue;
>>
>> With:
>>
>> (1) _CLAMP_RESET => reset both min and max
>> (2) _CLAMP_RESET | _CLAMP_MIN => reset min value
>> (3) _CLAMP_RESET | _CLAMP_MAX => reset max value
>> (4) _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
>>
>> If you reset an RT task with (1) you don't reset its uclamp.min value.
>>
>> __uclamp_update_util_min_rt_default(p) doesn't know about
>> SCHED_FLAG_UTIL_CLAMP_RESET. It only knows user_defined and will bail early.
>>
>
> Sorry I didn't notice __uclamp_update_util_min_rt_default will return
> directly if user_defined is set, I'll fix it.
>
>> [...]
>>
>>> - if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
>>> + if (likely(!flags || flags == SCHED_FLAG_UTIL_CLAMP_RESET))
>>> return;
>>>
>>> - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
>>> + if (flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
>>> + if (reset) {
>>> + clamp_value = __default_uclamp_value(p, UCLAMP_MIN);
>>> + user_defined = false;
>>> + } else {
>>> + clamp_value = attr->sched_util_min;
>>> + user_defined = true;
>>> + }
>>
>> Why do you reset for (1) in the for_each_clamp_id(clamp_id) loop and for
>> (2)-(4) in the if (flags & SCHED_FLAG_UTIL_CLAMP_MXX) condition?
>>
>> You could reset (1)-(4) in the for_each_clamp_id(clamp_id) loop? In this
>> case you wouldn't need __default_uclamp_value().
>
> Do you mean adding these code in for_each_clamp_id(clamp_id) loop?
>
> if (clamp_id == UCLAMP_MIN) {
> if (flags == SCHED_FLAG_UTIL_CLAMP_RESET ||
> (reset && (flags || SCHED_FLAG_UTIL_CLAMP_MIN)) ||
> !se->user_defined) {
> if (task_rt(p)) {
> clamp_value = sysctl_sched_uclamp_util_min_rt_default
> } else {
> clamp_value = uclamp_none(clamp_id);
> }
> } else
> continue;
> }
> /* similar code for UCLAMP_MAX */
> ...
> uclamp_se_set(uc_se, clamp_value, false);
>
> It seems more clear.
> If you think this one is better, I'll use this method and send patch V4.
I thought about something like this. Only lightly tested.
---8<---
From: Dietmar Eggemann <[email protected]>
Date: Mon, 26 Oct 2020 13:52:23 +0100
Subject: [PATCH] SCHED_FLAG_UTIL_CLAMP_RESET
Signed-off-by: Dietmar Eggemann <[email protected]>
---
include/uapi/linux/sched.h | 4 +++-
kernel/sched/core.c | 31 +++++++++++++++++++++++++++----
2 files changed, 30 insertions(+), 5 deletions(-)
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ceab2..0dd890822751 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -132,12 +132,14 @@ struct clone_args {
#define SCHED_FLAG_KEEP_PARAMS 0x10
#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
+#define SCHED_FLAG_UTIL_CLAMP_RESET 0x80
#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
SCHED_FLAG_KEEP_PARAMS)
#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
- SCHED_FLAG_UTIL_CLAMP_MAX)
+ SCHED_FLAG_UTIL_CLAMP_MAX | \
+ SCHED_FLAG_UTIL_CLAMP_RESET)
#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
SCHED_FLAG_RECLAIM | \
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3dc415f58bd7..717b1cf5cf1f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1438,6 +1438,23 @@ static int uclamp_validate(struct task_struct *p,
return 0;
}
+static bool uclamp_reset(enum uclamp_id clamp_id, unsigned long flags)
+{
+ if (!(flags & SCHED_FLAG_UTIL_CLAMP_RESET))
+ return false;
+
+ if (!(flags & (SCHED_FLAG_UTIL_CLAMP_MIN | SCHED_FLAG_UTIL_CLAMP_MAX)))
+ return true;
+
+ if ((flags & SCHED_FLAG_UTIL_CLAMP_MIN) && clamp_id == UCLAMP_MIN)
+ return true;
+
+ if ((flags & SCHED_FLAG_UTIL_CLAMP_MAX) && clamp_id == UCLAMP_MAX)
+ return true;
+
+ return false;
+}
+
static void __setscheduler_uclamp(struct task_struct *p,
const struct sched_attr *attr)
{
@@ -1449,24 +1466,30 @@ static void __setscheduler_uclamp(struct task_struct *p,
*/
for_each_clamp_id(clamp_id) {
struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
+ unsigned int value;
/* Keep using defined clamps across class changes */
- if (uc_se->user_defined)
+ if (!uclamp_reset(clamp_id, attr->sched_flags) &&
+ uc_se->user_defined) {
continue;
+ }
/*
* RT by default have a 100% boost value that could be modified
* at runtime.
*/
if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
- __uclamp_update_util_min_rt_default(p);
+ value = sysctl_sched_uclamp_util_min_rt_default;
else
- uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
+ value = uclamp_none(clamp_id);
+ uclamp_se_set(uc_se, value, false);
}
- if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
+ if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) ||
+ attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_RESET) {
return;
+ }
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
--
2.17.1
Hi Dietmar,
On Mon, Oct 26, 2020 at 08:00:48PM +0100, Dietmar Eggemann wrote:
> On 26/10/2020 16:45, Yun Hsiang wrote:
> > Hi Dietmar,
> >
> > On Mon, Oct 26, 2020 at 10:47:11AM +0100, Dietmar Eggemann wrote:
> >> On 25/10/2020 08:36, Yun Hsiang wrote:
> >>> If the user wants to stop controlling uclamp and let the task inherit
> >>> the value from the group, we need a method to reset.
> >>>
> >>> Add SCHED_FLAG_UTIL_CLAMP_RESET flag to allow the user to reset uclamp via
> >>> sched_setattr syscall.
> >>>
> >>> The policy is
> >>> _CLAMP_RESET => reset both min and max
> >>> _CLAMP_RESET | _CLAMP_MIN => reset min value
> >>> _CLAMP_RESET | _CLAMP_MAX => reset max value
> >>> _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
> >>>
> >>> Signed-off-by: Yun Hsiang <[email protected]>
> >>
> >> [...]
> >>
> >>> @@ -1451,7 +1464,8 @@ static void __setscheduler_uclamp(struct task_struct *p,
> >>> struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
> >>>
> >>> /* Keep using defined clamps across class changes */
> >>> - if (uc_se->user_defined)
> >>> + if (flags != SCHED_FLAG_UTIL_CLAMP_RESET &&
> >>> + uc_se->user_defined)
> >>> continue;
> >>
> >> With:
> >>
> >> (1) _CLAMP_RESET => reset both min and max
> >> (2) _CLAMP_RESET | _CLAMP_MIN => reset min value
> >> (3) _CLAMP_RESET | _CLAMP_MAX => reset max value
> >> (4) _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
> >>
> >> If you reset an RT task with (1) you don't reset its uclamp.min value.
> >>
> >> __uclamp_update_util_min_rt_default(p) doesn't know about
> >> SCHED_FLAG_UTIL_CLAMP_RESET. It only knows user_defined and will bail early.
> >>
> >
> > Sorry I didn't notice __uclamp_update_util_min_rt_default will return
> > directly if user_defined is set, I'll fix it.
> >
> >> [...]
> >>
> >>> - if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
> >>> + if (likely(!flags || flags == SCHED_FLAG_UTIL_CLAMP_RESET))
> >>> return;
> >>>
> >>> - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> >>> + if (flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> >>> + if (reset) {
> >>> + clamp_value = __default_uclamp_value(p, UCLAMP_MIN);
> >>> + user_defined = false;
> >>> + } else {
> >>> + clamp_value = attr->sched_util_min;
> >>> + user_defined = true;
> >>> + }
> >>
> >> Why do you reset for (1) in the for_each_clamp_id(clamp_id) loop and for
> >> (2)-(4) in the if (flags & SCHED_FLAG_UTIL_CLAMP_MXX) condition?
> >>
> >> You could reset (1)-(4) in the for_each_clamp_id(clamp_id) loop? In this
> >> case you wouldn't need __default_uclamp_value().
> >
> > Do you mean adding these code in for_each_clamp_id(clamp_id) loop?
> >
> > if (clamp_id == UCLAMP_MIN) {
> > if (flags == SCHED_FLAG_UTIL_CLAMP_RESET ||
> > (reset && (flags || SCHED_FLAG_UTIL_CLAMP_MIN)) ||
> > !se->user_defined) {
> > if (task_rt(p)) {
> > clamp_value = sysctl_sched_uclamp_util_min_rt_default
> > } else {
> > clamp_value = uclamp_none(clamp_id);
> > }
> > } else
> > continue;
> > }
> > /* similar code for UCLAMP_MAX */
> > ...
> > uclamp_se_set(uc_se, clamp_value, false);
> >
> > It seems more clear.
> > If you think this one is better, I'll use this method and send patch V4.
>
> I thought about something like this. Only lightly tested.
>
> ---8<---
>
> From: Dietmar Eggemann <[email protected]>
> Date: Mon, 26 Oct 2020 13:52:23 +0100
> Subject: [PATCH] SCHED_FLAG_UTIL_CLAMP_RESET
>
> Signed-off-by: Dietmar Eggemann <[email protected]>
> ---
> include/uapi/linux/sched.h | 4 +++-
> kernel/sched/core.c | 31 +++++++++++++++++++++++++++----
> 2 files changed, 30 insertions(+), 5 deletions(-)
>
> diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> index 3bac0a8ceab2..0dd890822751 100644
> --- a/include/uapi/linux/sched.h
> +++ b/include/uapi/linux/sched.h
> @@ -132,12 +132,14 @@ struct clone_args {
> #define SCHED_FLAG_KEEP_PARAMS 0x10
> #define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
> #define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
> +#define SCHED_FLAG_UTIL_CLAMP_RESET 0x80
>
> #define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
> SCHED_FLAG_KEEP_PARAMS)
>
> #define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
> - SCHED_FLAG_UTIL_CLAMP_MAX)
> + SCHED_FLAG_UTIL_CLAMP_MAX | \
> + SCHED_FLAG_UTIL_CLAMP_RESET)
>
> #define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
> SCHED_FLAG_RECLAIM | \
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 3dc415f58bd7..717b1cf5cf1f 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1438,6 +1438,23 @@ static int uclamp_validate(struct task_struct *p,
> return 0;
> }
>
> +static bool uclamp_reset(enum uclamp_id clamp_id, unsigned long flags)
> +{
> + if (!(flags & SCHED_FLAG_UTIL_CLAMP_RESET))
> + return false;
> +
> + if (!(flags & (SCHED_FLAG_UTIL_CLAMP_MIN | SCHED_FLAG_UTIL_CLAMP_MAX)))
> + return true;
> +
> + if ((flags & SCHED_FLAG_UTIL_CLAMP_MIN) && clamp_id == UCLAMP_MIN)
> + return true;
> +
> + if ((flags & SCHED_FLAG_UTIL_CLAMP_MAX) && clamp_id == UCLAMP_MAX)
> + return true;
> +
> + return false;
> +}
> +
> static void __setscheduler_uclamp(struct task_struct *p,
> const struct sched_attr *attr)
> {
> @@ -1449,24 +1466,30 @@ static void __setscheduler_uclamp(struct task_struct *p,
> */
> for_each_clamp_id(clamp_id) {
> struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
> + unsigned int value;
>
> /* Keep using defined clamps across class changes */
> - if (uc_se->user_defined)
> + if (!uclamp_reset(clamp_id, attr->sched_flags) &&
> + uc_se->user_defined) {
> continue;
> + }
>
> /*
> * RT by default have a 100% boost value that could be modified
> * at runtime.
> */
> if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
> - __uclamp_update_util_min_rt_default(p);
> + value = sysctl_sched_uclamp_util_min_rt_default;
> else
> - uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
> + value = uclamp_none(clamp_id);
>
> + uclamp_se_set(uc_se, value, false);
> }
>
> - if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
> + if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) ||
> + attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_RESET) {
> return;
> + }
>
> if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
Got it. This is much better. I'll test and send patch V4.
Thank for review and suggestions!
> --
> 2.17.1
>
>
>
Best Regards,
Yun
Hi Dietmar, Yun,
I hope I'm not too late before v4 posting ;)
I think the overall approach is sound, I just added in a couple of
cleanups and a possible fix (user_defined reset).
Best,
Patrick
On Tue, Oct 27, 2020 at 16:58:13 +0100, Yun Hsiang <[email protected]> wrote...
> Hi Diet mar,
> On Mon, Oct 26, 2020 at 08:00:48PM +0100, Dietmar Eggemann wrote:
>> On 26/10/2020 16:45, Yun Hsiang wrote:
[...]
>> I thought about something like this. Only lightly tested.
>>
>> ---8<---
>>
>> From: Dietmar Eggemann <[email protected]>
>> Date: Mon, 26 Oct 2020 13:52:23 +0100
>> Subject: [PATCH] SCHED_FLAG_UTIL_CLAMP_RESET
>>
>> Signed-off-by: Dietmar Eggemann <[email protected]>
>> ---
>> include/uapi/linux/sched.h | 4 +++-
>> kernel/sched/core.c | 31 +++++++++++++++++++++++++++----
>> 2 files changed, 30 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
>> index 3bac0a8ceab2..0dd890822751 100644
>> --- a/include/uapi/linux/sched.h
>> +++ b/include/uapi/linux/sched.h
>> @@ -132,12 +132,14 @@ struct clone_args {
>> #define SCHED_FLAG_KEEP_PARAMS 0x10
>> #define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
>> #define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
>> +#define SCHED_FLAG_UTIL_CLAMP_RESET 0x80
>>
>> #define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
>> SCHED_FLAG_KEEP_PARAMS)
>>
>> #define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
>> - SCHED_FLAG_UTIL_CLAMP_MAX)
>> + SCHED_FLAG_UTIL_CLAMP_MAX | \
>> + SCHED_FLAG_UTIL_CLAMP_RESET)
>>
>> #define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
>> SCHED_FLAG_RECLAIM | \
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 3dc415f58bd7..717b1cf5cf1f 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -1438,6 +1438,23 @@ static int uclamp_validate(struct task_struct *p,
>> return 0;
>> }
>>
>> +static bool uclamp_reset(enum uclamp_id clamp_id, unsigned long flags)
>> +{
Maybe we can add in some comments?
/* No _UCLAMP_RESET flag set: do not reset */
>> + if (!(flags & SCHED_FLAG_UTIL_CLAMP_RESET))
>> + return false;
>> +
/* Only _UCLAMP_RESET flag set: reset both clamps */
>> + if (!(flags & (SCHED_FLAG_UTIL_CLAMP_MIN | SCHED_FLAG_UTIL_CLAMP_MAX)))
>> + return true;
>> +
/* Both _UCLAMP_RESET and _UCLAMP_MIN flags are set: reset only min */
>> + if ((flags & SCHED_FLAG_UTIL_CLAMP_MIN) && clamp_id == UCLAMP_MIN)
>> + return true;
>> +
/* Both _UCLAMP_RESET and _UCLAMP_MAX flags are set: reset only max */
>> + if ((flags & SCHED_FLAG_UTIL_CLAMP_MAX) && clamp_id == UCLAMP_MAX)
>> + return true;
Since the evaluation ordering is important, do we have to better
_always_ use a READ_ONCE() for all flags accesses above, to ensure it is
preserved?
>> +
>> + return false;
>> +}
>> +
>> static void __setscheduler_uclamp(struct task_struct *p,
>> const struct sched_attr *attr)
>> {
>> @@ -1449,24 +1466,30 @@ static void __setscheduler_uclamp(struct task_struct *p,
>> */
Perhaps we should update the comment above this loop with something
like:
/*
* Reset to default clamps on forced _UCLAMP_RESET (always) and
* for tasks without a task-specific value (on scheduling class change).
*/
>> for_each_clamp_id(clamp_id) {
>> struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
>> + unsigned int value;
>>
>> /* Keep using defined clamps across class changes */
>> - if (uc_se->user_defined)
>> + if (!uclamp_reset(clamp_id, attr->sched_flags) &&
>> + uc_se->user_defined) {
>> continue;
>> + }
I think we miss to reset the user_defined flag here.
What about replacing the above chunk with:
if (uclamp_reset(clamp_id, attr->sched_flags))
uc_se->user_defined = false;
if (uc-se->user_defined)
continue;
?
>>
>> /*
>> * RT by default have a 100% boost value that could be modified
>> * at runtime.
>> */
>> if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
>> - __uclamp_update_util_min_rt_default(p);
>> + value = sysctl_sched_uclamp_util_min_rt_default;
By removing this usage of __uclamp_updadate_util_min_rt_default(p),
the only other usage remaining is the call from:
uclamp_udpate_util_min_rt_default().
What about an additional cleanup by in-lining the only surviving usage?
>> else
>> - uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
>> + value = uclamp_none(clamp_id);
>>
>> + uclamp_se_set(uc_se, value, false);
>> }
>>
>> - if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
>> + if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) ||
>> + attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_RESET) {
The likely() above should not wrap both conditions to be effective?
>> return;
>> + }
>>
>> if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
>> uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
On Wed, Oct 28, 2020 at 12:39:43 +0100, Qais Yousef <[email protected]> wrote...
> On 10/28/20 11:11, Patrick Bellasi wrote:
>> >>
>> >> /*
>> >> * RT by default have a 100% boost value that could be modified
>> >> * at runtime.
>> >> */
>> >> if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
>> >> - __uclamp_update_util_min_rt_default(p);
>> >> + value = sysctl_sched_uclamp_util_min_rt_default;
>>
>> By removing this usage of __uclamp_updadate_util_min_rt_default(p),
>> the only other usage remaining is the call from:
>> uclamp_udpate_util_min_rt_default().
>>
>> What about an additional cleanup by in-lining the only surviving usage?
>
> This is not a cleanup IMO. There is special rule about updating that are
> encoded and documented in this helper function. Namely:
>
> * p->pi_lock must be held.
> * p->uclamp_req[].user_defined must be false.
Both these conditions are satisfied in the above call site:
- user_defined is tested just 4 lines above
- pi_lock is taken by the caller, i.e. __sched_setscheduler()
Thus, there is no need to test them two times.
Moreover, the same granted pi_lock you check in
__ucalmp_update_util_min_rt_default() is not checked at all in the rest
of __setscheduler_uclamp().
Thus, perhaps we should have just avoided to add
__uclamp_update_util_min_rt_default() since the beginning and:
- have all its logic in the _only_ place where it's required
- added the lockdep_assert_held() in __setscheduler_uclamp()
That's why I consider this a very good cleanup opportunity.
> I don't see open coding helps but rather makes the code harder to read and
> prone to introduce bugs if anything gets reshuffled in the future.
It's not open coding IMHO, it's just adding the code that's required.
Hi Patrick,
On Wed, Oct 28, 2020 at 11:11:07AM +0100, Patrick Bellasi wrote:
>
> Hi Dietmar, Yun,
> I hope I'm not too late before v4 posting ;)
>
> I think the overall approach is sound, I just added in a couple of
> cleanups and a possible fix (user_defined reset).
>
> Best,
> Patrick
>
>
> On Tue, Oct 27, 2020 at 16:58:13 +0100, Yun Hsiang <[email protected]> wrote...
>
> > Hi Diet mar,
> > On Mon, Oct 26, 2020 at 08:00:48PM +0100, Dietmar Eggemann wrote:
> >> On 26/10/2020 16:45, Yun Hsiang wrote:
>
> [...]
>
> >> I thought about something like this. Only lightly tested.
> >>
> >> ---8<---
> >>
> >> From: Dietmar Eggemann <[email protected]>
> >> Date: Mon, 26 Oct 2020 13:52:23 +0100
> >> Subject: [PATCH] SCHED_FLAG_UTIL_CLAMP_RESET
> >>
> >> Signed-off-by: Dietmar Eggemann <[email protected]>
> >> ---
> >> include/uapi/linux/sched.h | 4 +++-
> >> kernel/sched/core.c | 31 +++++++++++++++++++++++++++----
> >> 2 files changed, 30 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> >> index 3bac0a8ceab2..0dd890822751 100644
> >> --- a/include/uapi/linux/sched.h
> >> +++ b/include/uapi/linux/sched.h
> >> @@ -132,12 +132,14 @@ struct clone_args {
> >> #define SCHED_FLAG_KEEP_PARAMS 0x10
> >> #define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
> >> #define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
> >> +#define SCHED_FLAG_UTIL_CLAMP_RESET 0x80
> >>
> >> #define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
> >> SCHED_FLAG_KEEP_PARAMS)
> >>
> >> #define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
> >> - SCHED_FLAG_UTIL_CLAMP_MAX)
> >> + SCHED_FLAG_UTIL_CLAMP_MAX | \
> >> + SCHED_FLAG_UTIL_CLAMP_RESET)
> >>
> >> #define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
> >> SCHED_FLAG_RECLAIM | \
> >> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> >> index 3dc415f58bd7..717b1cf5cf1f 100644
> >> --- a/kernel/sched/core.c
> >> +++ b/kernel/sched/core.c
> >> @@ -1438,6 +1438,23 @@ static int uclamp_validate(struct task_struct *p,
> >> return 0;
> >> }
> >>
> >> +static bool uclamp_reset(enum uclamp_id clamp_id, unsigned long flags)
> >> +{
>
> Maybe we can add in some comments?
>
I'll add these comment.
>
> /* No _UCLAMP_RESET flag set: do not reset */
> >> + if (!(flags & SCHED_FLAG_UTIL_CLAMP_RESET))
> >> + return false;
> >> +
>
> /* Only _UCLAMP_RESET flag set: reset both clamps */
> >> + if (!(flags & (SCHED_FLAG_UTIL_CLAMP_MIN | SCHED_FLAG_UTIL_CLAMP_MAX)))
> >> + return true;
> >> +
> /* Both _UCLAMP_RESET and _UCLAMP_MIN flags are set: reset only min */
> >> + if ((flags & SCHED_FLAG_UTIL_CLAMP_MIN) && clamp_id == UCLAMP_MIN)
> >> + return true;
> >> +
>
> /* Both _UCLAMP_RESET and _UCLAMP_MAX flags are set: reset only max */
> >> + if ((flags & SCHED_FLAG_UTIL_CLAMP_MAX) && clamp_id == UCLAMP_MAX)
> >> + return true;
>
> Since the evaluation ordering is important, do we have to better
> _always_ use a READ_ONCE() for all flags accesses above, to ensure it is
> preserved?
>
Is this mean that we want to use READ_ONCE to avoid compiler reordering these
conditions?
> >> +
> >> + return false;
> >> +}
> >> +
> >> static void __setscheduler_uclamp(struct task_struct *p,
> >> const struct sched_attr *attr)
> >> {
> >> @@ -1449,24 +1466,30 @@ static void __setscheduler_uclamp(struct task_struct *p,
> >> */
>
> Perhaps we should update the comment above this loop with something
> like:
>
> /*
> * Reset to default clamps on forced _UCLAMP_RESET (always) and
> * for tasks without a task-specific value (on scheduling class change).
> */
> >> for_each_clamp_id(clamp_id) {
> >> struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
> >> + unsigned int value;
> >>
> >> /* Keep using defined clamps across class changes */
> >> - if (uc_se->user_defined)
> >> + if (!uclamp_reset(clamp_id, attr->sched_flags) &&
> >> + uc_se->user_defined) {
> >> continue;
> >> + }
>
> I think we miss to reset the user_defined flag here.
>
> What about replacing the above chunk with:
>
> if (uclamp_reset(clamp_id, attr->sched_flags))
> uc_se->user_defined = false;
> if (uc-se->user_defined)
> continue;
>
> ?
user_defined flag will be reset later by uclamp_se_set(uc_se, value,
false). But I agree to split it to two condition because it seems
clearer.
>
>
> >>
> >> /*
> >> * RT by default have a 100% boost value that could be modified
> >> * at runtime.
> >> */
> >> if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
> >> - __uclamp_update_util_min_rt_default(p);
> >> + value = sysctl_sched_uclamp_util_min_rt_default;
>
> By removing this usage of __uclamp_updadate_util_min_rt_default(p),
> the only other usage remaining is the call from:
> uclamp_udpate_util_min_rt_default().
>
> What about an additional cleanup by in-lining the only surviving usage?
>
>
> >> else
> >> - uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
> >> + value = uclamp_none(clamp_id);
> >>
> >> + uclamp_se_set(uc_se, value, false);
> >> }
> >>
> >> - if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
> >> + if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) ||
> >> + attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_RESET) {
>
> The likely() above should not wrap both conditions to be effective?
Got it.
>
> >> return;
> >> + }
> >>
> >> if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> >> uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
On 10/28/20 19:03, Patrick Bellasi wrote:
>
> On Wed, Oct 28, 2020 at 12:39:43 +0100, Qais Yousef <[email protected]> wrote...
>
> > On 10/28/20 11:11, Patrick Bellasi wrote:
> >> >>
> >> >> /*
> >> >> * RT by default have a 100% boost value that could be modified
> >> >> * at runtime.
> >> >> */
> >> >> if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
> >> >> - __uclamp_update_util_min_rt_default(p);
> >> >> + value = sysctl_sched_uclamp_util_min_rt_default;
> >>
> >> By removing this usage of __uclamp_updadate_util_min_rt_default(p),
> >> the only other usage remaining is the call from:
> >> uclamp_udpate_util_min_rt_default().
> >>
> >> What about an additional cleanup by in-lining the only surviving usage?
> >
> > This is not a cleanup IMO. There is special rule about updating that are
> > encoded and documented in this helper function. Namely:
> >
> > * p->pi_lock must be held.
> > * p->uclamp_req[].user_defined must be false.
>
> Both these conditions are satisfied in the above call site:
> - user_defined is tested just 4 lines above
> - pi_lock is taken by the caller, i.e. __sched_setscheduler()
> Thus, there is no need to test them two times.
> Moreover, the same granted pi_lock you check in
> __ucalmp_update_util_min_rt_default() is not checked at all in the rest
> of __setscheduler_uclamp().
Updating the default rt value is done from different contexts. Hence it is
important to document the rules under which this update must happen and ensure
the update happens through a common path.
__setscheduler_uclamp() is not called from 2 different contexts.
> Thus, perhaps we should have just avoided to add
> __uclamp_update_util_min_rt_default() since the beginning and:
> - have all its logic in the _only_ place where it's required
> - added the lockdep_assert_held() in __setscheduler_uclamp()
>
> That's why I consider this a very good cleanup opportunity.
I disagree. This is unnecessary churn.
Thanks
--
Qais Yousef
> > I don't see open coding helps but rather makes the code harder to read and
> > prone to introduce bugs if anything gets reshuffled in the future.
>
> It's not open coding IMHO, it's just adding the code that's required.
On 10/28/20 11:11, Patrick Bellasi wrote:
> >>
> >> /*
> >> * RT by default have a 100% boost value that could be modified
> >> * at runtime.
> >> */
> >> if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
> >> - __uclamp_update_util_min_rt_default(p);
> >> + value = sysctl_sched_uclamp_util_min_rt_default;
>
> By removing this usage of __uclamp_updadate_util_min_rt_default(p),
> the only other usage remaining is the call from:
> uclamp_udpate_util_min_rt_default().
>
> What about an additional cleanup by in-lining the only surviving usage?
This is not a cleanup IMO. There is special rule about updating that are
encoded and documented in this helper function. Namely:
* p->pi_lock must be held.
* p->uclamp_req[].user_defined must be false.
I don't see open coding helps but rather makes the code harder to read and
prone to introduce bugs if anything gets reshuffled in the future.
Thanks
--
Qais Yousef
Hi Yun
Sorry for chipping in late.
On 10/25/20 15:36, Yun Hsiang wrote:
> If the user wants to stop controlling uclamp and let the task inherit
> the value from the group, we need a method to reset.
>
> Add SCHED_FLAG_UTIL_CLAMP_RESET flag to allow the user to reset uclamp via
> sched_setattr syscall.
>
> The policy is
> _CLAMP_RESET => reset both min and max
> _CLAMP_RESET | _CLAMP_MIN => reset min value
> _CLAMP_RESET | _CLAMP_MAX => reset max value
> _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
>
> Signed-off-by: Yun Hsiang <[email protected]>
> ---
> include/uapi/linux/sched.h | 7 +++++--
> kernel/sched/core.c | 41 +++++++++++++++++++++++++++++++-------
> 2 files changed, 39 insertions(+), 9 deletions(-)
>
> diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> index 3bac0a8ceab2..6c823ddb1a1e 100644
> --- a/include/uapi/linux/sched.h
> +++ b/include/uapi/linux/sched.h
> @@ -132,17 +132,20 @@ struct clone_args {
> #define SCHED_FLAG_KEEP_PARAMS 0x10
> #define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
> #define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
> +#define SCHED_FLAG_UTIL_CLAMP_RESET 0x80
>
> #define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
> SCHED_FLAG_KEEP_PARAMS)
>
> #define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
> - SCHED_FLAG_UTIL_CLAMP_MAX)
> + SCHED_FLAG_UTIL_CLAMP_MAX | \
> + SCHED_FLAG_UTIL_CLAMP_RESET)
Is it safe to change this define in a uapi header without a potential
consequence?
FWIW I still have concerns about this approach. We're doing a reset to control
cgroup behavior, I don't see any correlation between the two. Besides the
difference between RESET and setting uclamp_min=0 without RESET is not obvious
nor intuitive for someone who didn't look at the code.
I propose something like the below which is more explicit about what is being
requested and delivered here. And if we decide to deprecate this behavior,
it'd be much easier to just ignore this flag.
You must set this flag with your uclamp request to retain the cgroup
inheritance behavior. If the flag is not set, we automatically clear it.
Only compile tested.
Thanks
--
Qais Yousef
--------->8-----------
From fe48fde7dea582b3b075e80e6936e1199f24363d Mon Sep 17 00:00:00 2001
From: Qais Yousef <[email protected]>
Date: Wed, 28 Oct 2020 22:36:26 +0000
Subject: [PATCH] sched/uclamp: Add new flag to control cgroup behavior
Signed-off-by: Qais Yousef <[email protected]>
---
include/linux/sched.h | 1 +
include/uapi/linux/sched.h | 6 ++++++
kernel/sched/core.c | 25 ++++++++++++++++---------
3 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index afe01e232935..6eb35dfaa893 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -604,6 +604,7 @@ struct uclamp_se {
unsigned int bucket_id : bits_per(UCLAMP_BUCKETS);
unsigned int active : 1;
unsigned int user_defined : 1;
+ unsigned int follow_cgroup : 1;
};
#endif /* CONFIG_UCLAMP_TASK */
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ceab2..33ff716a7574 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -132,6 +132,12 @@ struct clone_args {
#define SCHED_FLAG_KEEP_PARAMS 0x10
#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
+/*
+ * Control whether a task follows/inherits the cgroup uclamp.min/max or not.
+ * By default this flag is set for all tasks. Any explicit modification to a
+ * task's UCLAMP_MIN/MAX must set this flag to retain the bahavior.
+ */
+#define SCHED_FLAG_UTIL_FOLLOW_CGROUP 0x80
#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
SCHED_FLAG_KEEP_PARAMS)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d95dc3f4644..1c77d6b8bd96 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1083,7 +1083,7 @@ uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
return uc_req;
uc_max = task_group(p)->uclamp[clamp_id];
- if (uc_req.value > uc_max.value || !uc_req.user_defined)
+ if (uc_req.value > uc_max.value || uc_req.follow_cgroup)
return uc_max;
#endif
@@ -1446,6 +1446,7 @@ static void __setscheduler_uclamp(struct task_struct *p,
const struct sched_attr *attr)
{
enum uclamp_id clamp_id;
+ bool follow_cgroup;
/*
* On scheduling class change, reset to default clamps for tasks
@@ -1472,14 +1473,18 @@ static void __setscheduler_uclamp(struct task_struct *p,
if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
return;
+ follow_cgroup = attr->sched_flags & SCHED_FLAG_UTIL_FOLLOW_CGROUP;
+
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
- uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
- attr->sched_util_min, true);
+ struct uclamp_se *uc_se = &p->uclamp_req[UCLAMP_MIN];
+ uc_se->follow_cgroup = follow_cgroup;
+ uclamp_se_set(uc_se, attr->sched_util_min, true);
}
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
- uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
- attr->sched_util_max, true);
+ struct uclamp_se *uc_se = &p->uclamp_req[UCLAMP_MAX];
+ uc_se->follow_cgroup = follow_cgroup;
+ uclamp_se_set(uc_se, attr->sched_util_max, true);
}
}
@@ -1498,8 +1503,9 @@ static void uclamp_fork(struct task_struct *p)
return;
for_each_clamp_id(clamp_id) {
- uclamp_se_set(&p->uclamp_req[clamp_id],
- uclamp_none(clamp_id), false);
+ struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
+ uc_se->follow_cgroup = true;
+ uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
}
}
@@ -1532,8 +1538,9 @@ static void __init init_uclamp(void)
init_uclamp_rq(cpu_rq(cpu));
for_each_clamp_id(clamp_id) {
- uclamp_se_set(&init_task.uclamp_req[clamp_id],
- uclamp_none(clamp_id), false);
+ struct uclamp_se *uc_se = &init_task.uclamp_req[clamp_id];
+ uc_se->follow_cgroup = true;
+ uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
}
/* System defaults allow max clamp values for both indexes */
--
2.25.1
On 28/10/2020 19:41, Yun Hsiang wrote:
> Hi Patrick,
>
> On Wed, Oct 28, 2020 at 11:11:07AM +0100, Patrick Bellasi wrote:
[...]
>> On Tue, Oct 27, 2020 at 16:58:13 +0100, Yun Hsiang <[email protected]> wrote...
>>
>>> Hi Diet mar,
>>> On Mon, Oct 26, 2020 at 08:00:48PM +0100, Dietmar Eggemann wrote:
>>>> On 26/10/2020 16:45, Yun Hsiang wrote:
[...]
>>>> From: Dietmar Eggemann <[email protected]>
[...]
>>>> +static bool uclamp_reset(enum uclamp_id clamp_id, unsigned long flags)
>>>> +{
>>
>> Maybe we can add in some comments?
>>
> I'll add these comment.
Yeah, why not.
>> /* No _UCLAMP_RESET flag set: do not reset */
>>>> + if (!(flags & SCHED_FLAG_UTIL_CLAMP_RESET))
>>>> + return false;
>>>> +
>>
>> /* Only _UCLAMP_RESET flag set: reset both clamps */
>>>> + if (!(flags & (SCHED_FLAG_UTIL_CLAMP_MIN | SCHED_FLAG_UTIL_CLAMP_MAX)))
>>>> + return true;
>>>> +
>> /* Both _UCLAMP_RESET and _UCLAMP_MIN flags are set: reset only min */
>>>> + if ((flags & SCHED_FLAG_UTIL_CLAMP_MIN) && clamp_id == UCLAMP_MIN)
>>>> + return true;
>>>> +
>>
>> /* Both _UCLAMP_RESET and _UCLAMP_MAX flags are set: reset only max */
>>>> + if ((flags & SCHED_FLAG_UTIL_CLAMP_MAX) && clamp_id == UCLAMP_MAX)
>>>> + return true;
>>
>> Since the evaluation ordering is important, do we have to better
>> _always_ use a READ_ONCE() for all flags accesses above, to ensure it is
>> preserved?
>>
>
> Is this mean that we want to use READ_ONCE to avoid compiler reordering these
> conditions?
Why would you need a READ_ONCE() on flags here?
[...]
>>>> /* Keep using defined clamps across class changes */
>>>> - if (uc_se->user_defined)
>>>> + if (!uclamp_reset(clamp_id, attr->sched_flags) &&
>>>> + uc_se->user_defined) {
>>>> continue;
>>>> + }
>>
>> I think we miss to reset the user_defined flag here.
>>
>> What about replacing the above chunk with:
>>
>> if (uclamp_reset(clamp_id, attr->sched_flags))
>> uc_se->user_defined = false;
>> if (uc-se->user_defined)
>> continue;
>>
>> ?
>
> user_defined flag will be reset later by uclamp_se_set(uc_se, value,
> false). But I agree to split it to two condition because it seems
> clearer.
IMHO it's more elegant to use uclamp_reset() in the condition next to
uc-se->user_defined and let uclamp_se_set() set uc-se->user_defined to
false later.
>>>> /*
>>>> * RT by default have a 100% boost value that could be modified
>>>> * at runtime.
>>>> */
>>>> if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
>>>> - __uclamp_update_util_min_rt_default(p);
>>>> + value = sysctl_sched_uclamp_util_min_rt_default;
>>
>> By removing this usage of __uclamp_updadate_util_min_rt_default(p),
>> the only other usage remaining is the call from:
>> uclamp_udpate_util_min_rt_default().
>>
>> What about an additional cleanup by in-lining the only surviving usage?
Don't see why not.
>>>> else
>>>> - uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
>>>> + value = uclamp_none(clamp_id);
>>>>
>>>> + uclamp_se_set(uc_se, value, false);
>>>> }
>>>>
>>>> - if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
>>>> + if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) ||
>>>> + attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_RESET) {
>>
>> The likely() above should not wrap both conditions to be effective?
>
> Got it.
I thought the likely is for no uclamp activities, i.e. policy change.
And a uclamp reset is different to a policy change. But is this likely too?
Hi Qais,
On Thu, Oct 29, 2020 at 11:08:18AM +0000, Qais Yousef wrote:
> Hi Yun
>
> Sorry for chipping in late.
>
> On 10/25/20 15:36, Yun Hsiang wrote:
> > If the user wants to stop controlling uclamp and let the task inherit
> > the value from the group, we need a method to reset.
> >
> > Add SCHED_FLAG_UTIL_CLAMP_RESET flag to allow the user to reset uclamp via
> > sched_setattr syscall.
> >
> > The policy is
> > _CLAMP_RESET => reset both min and max
> > _CLAMP_RESET | _CLAMP_MIN => reset min value
> > _CLAMP_RESET | _CLAMP_MAX => reset max value
> > _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
> >
> > Signed-off-by: Yun Hsiang <[email protected]>
> > ---
> > include/uapi/linux/sched.h | 7 +++++--
> > kernel/sched/core.c | 41 +++++++++++++++++++++++++++++++-------
> > 2 files changed, 39 insertions(+), 9 deletions(-)
> >
> > diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> > index 3bac0a8ceab2..6c823ddb1a1e 100644
> > --- a/include/uapi/linux/sched.h
> > +++ b/include/uapi/linux/sched.h
> > @@ -132,17 +132,20 @@ struct clone_args {
> > #define SCHED_FLAG_KEEP_PARAMS 0x10
> > #define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
> > #define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
> > +#define SCHED_FLAG_UTIL_CLAMP_RESET 0x80
> >
> > #define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
> > SCHED_FLAG_KEEP_PARAMS)
> >
> > #define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
> > - SCHED_FLAG_UTIL_CLAMP_MAX)
> > + SCHED_FLAG_UTIL_CLAMP_MAX | \
> > + SCHED_FLAG_UTIL_CLAMP_RESET)
>
> Is it safe to change this define in a uapi header without a potential
> consequence?
>
> FWIW I still have concerns about this approach. We're doing a reset to control
> cgroup behavior, I don't see any correlation between the two. Besides the
> difference between RESET and setting uclamp_min=0 without RESET is not obvious
> nor intuitive for someone who didn't look at the code.
>
> I propose something like the below which is more explicit about what is being
> requested and delivered here. And if we decide to deprecate this behavior,
> it'd be much easier to just ignore this flag.
>
> You must set this flag with your uclamp request to retain the cgroup
> inheritance behavior. If the flag is not set, we automatically clear it.
I think this behavior may not meet android requirement. Becasue in
android there is group like top-app. And we want to boost the
group by setting group uclamp_min. If group inheritance is explicit, we
need to set this flag for all the tasks in top-app. This might be
costly.
>
> Only compile tested.
>
> Thanks
>
> --
> Qais Yousef
>
>
> --------->8-----------
>
> From fe48fde7dea582b3b075e80e6936e1199f24363d Mon Sep 17 00:00:00 2001
> From: Qais Yousef <[email protected]>
> Date: Wed, 28 Oct 2020 22:36:26 +0000
> Subject: [PATCH] sched/uclamp: Add new flag to control cgroup behavior
>
> Signed-off-by: Qais Yousef <[email protected]>
> ---
> include/linux/sched.h | 1 +
> include/uapi/linux/sched.h | 6 ++++++
> kernel/sched/core.c | 25 ++++++++++++++++---------
> 3 files changed, 23 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index afe01e232935..6eb35dfaa893 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -604,6 +604,7 @@ struct uclamp_se {
> unsigned int bucket_id : bits_per(UCLAMP_BUCKETS);
> unsigned int active : 1;
> unsigned int user_defined : 1;
> + unsigned int follow_cgroup : 1;
> };
> #endif /* CONFIG_UCLAMP_TASK */
>
> diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> index 3bac0a8ceab2..33ff716a7574 100644
> --- a/include/uapi/linux/sched.h
> +++ b/include/uapi/linux/sched.h
> @@ -132,6 +132,12 @@ struct clone_args {
> #define SCHED_FLAG_KEEP_PARAMS 0x10
> #define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
> #define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
> +/*
> + * Control whether a task follows/inherits the cgroup uclamp.min/max or not.
> + * By default this flag is set for all tasks. Any explicit modification to a
> + * task's UCLAMP_MIN/MAX must set this flag to retain the bahavior.
> + */
> +#define SCHED_FLAG_UTIL_FOLLOW_CGROUP 0x80
>
> #define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
> SCHED_FLAG_KEEP_PARAMS)
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 2d95dc3f4644..1c77d6b8bd96 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1083,7 +1083,7 @@ uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
> return uc_req;
>
> uc_max = task_group(p)->uclamp[clamp_id];
> - if (uc_req.value > uc_max.value || !uc_req.user_defined)
> + if (uc_req.value > uc_max.value || uc_req.follow_cgroup)
> return uc_max;
> #endif
>
> @@ -1446,6 +1446,7 @@ static void __setscheduler_uclamp(struct task_struct *p,
> const struct sched_attr *attr)
> {
> enum uclamp_id clamp_id;
> + bool follow_cgroup;
>
> /*
> * On scheduling class change, reset to default clamps for tasks
> @@ -1472,14 +1473,18 @@ static void __setscheduler_uclamp(struct task_struct *p,
> if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
> return;
>
> + follow_cgroup = attr->sched_flags & SCHED_FLAG_UTIL_FOLLOW_CGROUP;
> +
> if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> - uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
> - attr->sched_util_min, true);
> + struct uclamp_se *uc_se = &p->uclamp_req[UCLAMP_MIN];
> + uc_se->follow_cgroup = follow_cgroup;
> + uclamp_se_set(uc_se, attr->sched_util_min, true);
> }
>
> if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
> - uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
> - attr->sched_util_max, true);
> + struct uclamp_se *uc_se = &p->uclamp_req[UCLAMP_MAX];
> + uc_se->follow_cgroup = follow_cgroup;
> + uclamp_se_set(uc_se, attr->sched_util_max, true);
> }
> }
>
> @@ -1498,8 +1503,9 @@ static void uclamp_fork(struct task_struct *p)
> return;
>
> for_each_clamp_id(clamp_id) {
> - uclamp_se_set(&p->uclamp_req[clamp_id],
> - uclamp_none(clamp_id), false);
> + struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
> + uc_se->follow_cgroup = true;
> + uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
> }
> }
>
> @@ -1532,8 +1538,9 @@ static void __init init_uclamp(void)
> init_uclamp_rq(cpu_rq(cpu));
>
> for_each_clamp_id(clamp_id) {
> - uclamp_se_set(&init_task.uclamp_req[clamp_id],
> - uclamp_none(clamp_id), false);
> + struct uclamp_se *uc_se = &init_task.uclamp_req[clamp_id];
> + uc_se->follow_cgroup = true;
> + uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
> }
>
> /* System defaults allow max clamp values for both indexes */
> --
> 2.25.1
On 10/29/20 21:02, Yun Hsiang wrote:
> Hi Qais,
>
> On Thu, Oct 29, 2020 at 11:08:18AM +0000, Qais Yousef wrote:
> > Hi Yun
> >
> > Sorry for chipping in late.
> >
> > On 10/25/20 15:36, Yun Hsiang wrote:
> > > If the user wants to stop controlling uclamp and let the task inherit
> > > the value from the group, we need a method to reset.
> > >
> > > Add SCHED_FLAG_UTIL_CLAMP_RESET flag to allow the user to reset uclamp via
> > > sched_setattr syscall.
> > >
> > > The policy is
> > > _CLAMP_RESET => reset both min and max
> > > _CLAMP_RESET | _CLAMP_MIN => reset min value
> > > _CLAMP_RESET | _CLAMP_MAX => reset max value
> > > _CLAMP_RESET | _CLAMP_MIN | _CLAMP_MAX => reset both min and max
> > >
> > > Signed-off-by: Yun Hsiang <[email protected]>
> > > ---
> > > include/uapi/linux/sched.h | 7 +++++--
> > > kernel/sched/core.c | 41 +++++++++++++++++++++++++++++++-------
> > > 2 files changed, 39 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> > > index 3bac0a8ceab2..6c823ddb1a1e 100644
> > > --- a/include/uapi/linux/sched.h
> > > +++ b/include/uapi/linux/sched.h
> > > @@ -132,17 +132,20 @@ struct clone_args {
> > > #define SCHED_FLAG_KEEP_PARAMS 0x10
> > > #define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
> > > #define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
> > > +#define SCHED_FLAG_UTIL_CLAMP_RESET 0x80
> > >
> > > #define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
> > > SCHED_FLAG_KEEP_PARAMS)
> > >
> > > #define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
> > > - SCHED_FLAG_UTIL_CLAMP_MAX)
> > > + SCHED_FLAG_UTIL_CLAMP_MAX | \
> > > + SCHED_FLAG_UTIL_CLAMP_RESET)
> >
> > Is it safe to change this define in a uapi header without a potential
> > consequence?
> >
> > FWIW I still have concerns about this approach. We're doing a reset to control
> > cgroup behavior, I don't see any correlation between the two. Besides the
> > difference between RESET and setting uclamp_min=0 without RESET is not obvious
> > nor intuitive for someone who didn't look at the code.
> >
> > I propose something like the below which is more explicit about what is being
> > requested and delivered here. And if we decide to deprecate this behavior,
> > it'd be much easier to just ignore this flag.
> >
> > You must set this flag with your uclamp request to retain the cgroup
> > inheritance behavior. If the flag is not set, we automatically clear it.
>
> I think this behavior may not meet android requirement. Becasue in
> android there is group like top-app. And we want to boost the
> group by setting group uclamp_min. If group inheritance is explicit, we
> need to set this flag for all the tasks in top-app. This might be
> costly.
You will not have to set it for every task. It's on by default like it works
now. This behavior doesn't change.
But if you change the uclamp value of a task but still want it to continue to
inherit the cgroup values if it's attached to one, you must set this flag when
changing the uclamp value.
Thanks
--
Qais Yousef
>
> >
> > Only compile tested.
> >
> > Thanks
> >
> > --
> > Qais Yousef
> >
> >
> > --------->8-----------
> >
> > From fe48fde7dea582b3b075e80e6936e1199f24363d Mon Sep 17 00:00:00 2001
> > From: Qais Yousef <[email protected]>
> > Date: Wed, 28 Oct 2020 22:36:26 +0000
> > Subject: [PATCH] sched/uclamp: Add new flag to control cgroup behavior
> >
> > Signed-off-by: Qais Yousef <[email protected]>
> > ---
> > include/linux/sched.h | 1 +
> > include/uapi/linux/sched.h | 6 ++++++
> > kernel/sched/core.c | 25 ++++++++++++++++---------
> > 3 files changed, 23 insertions(+), 9 deletions(-)
> >
> > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > index afe01e232935..6eb35dfaa893 100644
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -604,6 +604,7 @@ struct uclamp_se {
> > unsigned int bucket_id : bits_per(UCLAMP_BUCKETS);
> > unsigned int active : 1;
> > unsigned int user_defined : 1;
> > + unsigned int follow_cgroup : 1;
> > };
> > #endif /* CONFIG_UCLAMP_TASK */
> >
> > diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> > index 3bac0a8ceab2..33ff716a7574 100644
> > --- a/include/uapi/linux/sched.h
> > +++ b/include/uapi/linux/sched.h
> > @@ -132,6 +132,12 @@ struct clone_args {
> > #define SCHED_FLAG_KEEP_PARAMS 0x10
> > #define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
> > #define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
> > +/*
> > + * Control whether a task follows/inherits the cgroup uclamp.min/max or not.
> > + * By default this flag is set for all tasks. Any explicit modification to a
> > + * task's UCLAMP_MIN/MAX must set this flag to retain the bahavior.
> > + */
> > +#define SCHED_FLAG_UTIL_FOLLOW_CGROUP 0x80
> >
> > #define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
> > SCHED_FLAG_KEEP_PARAMS)
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 2d95dc3f4644..1c77d6b8bd96 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -1083,7 +1083,7 @@ uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
> > return uc_req;
> >
> > uc_max = task_group(p)->uclamp[clamp_id];
> > - if (uc_req.value > uc_max.value || !uc_req.user_defined)
> > + if (uc_req.value > uc_max.value || uc_req.follow_cgroup)
> > return uc_max;
> > #endif
> >
> > @@ -1446,6 +1446,7 @@ static void __setscheduler_uclamp(struct task_struct *p,
> > const struct sched_attr *attr)
> > {
> > enum uclamp_id clamp_id;
> > + bool follow_cgroup;
> >
> > /*
> > * On scheduling class change, reset to default clamps for tasks
> > @@ -1472,14 +1473,18 @@ static void __setscheduler_uclamp(struct task_struct *p,
> > if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
> > return;
> >
> > + follow_cgroup = attr->sched_flags & SCHED_FLAG_UTIL_FOLLOW_CGROUP;
> > +
> > if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> > - uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
> > - attr->sched_util_min, true);
> > + struct uclamp_se *uc_se = &p->uclamp_req[UCLAMP_MIN];
> > + uc_se->follow_cgroup = follow_cgroup;
> > + uclamp_se_set(uc_se, attr->sched_util_min, true);
> > }
> >
> > if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
> > - uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
> > - attr->sched_util_max, true);
> > + struct uclamp_se *uc_se = &p->uclamp_req[UCLAMP_MAX];
> > + uc_se->follow_cgroup = follow_cgroup;
> > + uclamp_se_set(uc_se, attr->sched_util_max, true);
> > }
> > }
> >
> > @@ -1498,8 +1503,9 @@ static void uclamp_fork(struct task_struct *p)
> > return;
> >
> > for_each_clamp_id(clamp_id) {
> > - uclamp_se_set(&p->uclamp_req[clamp_id],
> > - uclamp_none(clamp_id), false);
> > + struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
> > + uc_se->follow_cgroup = true;
> > + uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
> > }
> > }
> >
> > @@ -1532,8 +1538,9 @@ static void __init init_uclamp(void)
> > init_uclamp_rq(cpu_rq(cpu));
> >
> > for_each_clamp_id(clamp_id) {
> > - uclamp_se_set(&init_task.uclamp_req[clamp_id],
> > - uclamp_none(clamp_id), false);
> > + struct uclamp_se *uc_se = &init_task.uclamp_req[clamp_id];
> > + uc_se->follow_cgroup = true;
> > + uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
> > }
> >
> > /* System defaults allow max clamp values for both indexes */
> > --
> > 2.25.1
On 29/10/2020 14:06, Qais Yousef wrote:
> On 10/29/20 21:02, Yun Hsiang wrote:
>> Hi Qais,
>>
>> On Thu, Oct 29, 2020 at 11:08:18AM +0000, Qais Yousef wrote:
>>> Hi Yun
>>>
>>> Sorry for chipping in late.
>>>
>>> On 10/25/20 15:36, Yun Hsiang wrote:
[...]
>>>> #define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
>>>> - SCHED_FLAG_UTIL_CLAMP_MAX)
>>>> + SCHED_FLAG_UTIL_CLAMP_MAX | \
>>>> + SCHED_FLAG_UTIL_CLAMP_RESET)
>>>
>>> Is it safe to change this define in a uapi header without a potential
>>> consequence?
AFAICS, there're 3 occurrences, besides the one in
__setscheduler_uclamp(), in which we use SCHED_FLAG_UTIL_CLAMP.
1) call uclamp_validate() in __sched_setscheduler()
2) jump to 'change' label in __sched_setscheduler()
3) check that the uattr->size is SCHED_ATTR_SIZE_VER1 in
sched_copy_attr()
2) and 3) require SCHED_FLAG_UTIL_CLAMP_RESET to be part of
SCHED_FLAG_UTIL_CLAMP but IMHO 1) needs this change:
@@ -1413,8 +1413,14 @@ int sysctl_sched_uclamp_handler(struct ctl_table
*table, int write,
static int uclamp_validate(struct task_struct *p,
const struct sched_attr *attr)
{
- unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;
- unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value;
+ unsigned int lower_bound, upper_bound;
+
+ /* Do not check uclamp attributes values in reset case. */
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_RESET)
+ return 0;
+
+ lower_bound = p->uclamp_req[UCLAMP_MIN].value;
+ upper_bound = p->uclamp_req[UCLAMP_MAX].value;
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN)
lower_bound = attr->sched_util_min;
Otherwise a bogus sa.sched_util_min or sa.sched_util_max with
SCHED_FLAG_UTIL_CLAMP_RESET could return -EINVAL.
>>> FWIW I still have concerns about this approach. We're doing a reset to control
>>> cgroup behavior, I don't see any correlation between the two. Besides the
>>> difference between RESET and setting uclamp_min=0 without RESET is not obvious
>>> nor intuitive for someone who didn't look at the code.
>>>
>>> I propose something like the below which is more explicit about what is being
>>> requested and delivered here. And if we decide to deprecate this behavior,
>>> it'd be much easier to just ignore this flag.
>>>
>>> You must set this flag with your uclamp request to retain the cgroup
>>> inheritance behavior. If the flag is not set, we automatically clear it.
>>
>> I think this behavior may not meet android requirement. Becasue in
>> android there is group like top-app. And we want to boost the
>> group by setting group uclamp_min. If group inheritance is explicit, we
>> need to set this flag for all the tasks in top-app. This might be
>> costly.
>
> You will not have to set it for every task. It's on by default like it works
> now. This behavior doesn't change.
>
> But if you change the uclamp value of a task but still want it to continue to
> inherit the cgroup values if it's attached to one, you must set this flag when
> changing the uclamp value.
I'm not really fond of this idea because:
(1) explicit cgroup(-behavior) related settings through a per-task user
interface.
(2) uclamp reset makes already sense in the !CONFIG_UCLAMP_TASK_GROUP
case. A task can reset its uclamp values here as well, and then
'inheriting' the system defaults again. Already mentioned in
https://lkml.kernel.org/r/[email protected]
[...]
On 10/29/20 16:50, Dietmar Eggemann wrote:
> On 29/10/2020 14:06, Qais Yousef wrote:
> > On 10/29/20 21:02, Yun Hsiang wrote:
> >> Hi Qais,
> >>
> >> On Thu, Oct 29, 2020 at 11:08:18AM +0000, Qais Yousef wrote:
> >>> Hi Yun
> >>>
> >>> Sorry for chipping in late.
> >>>
> >>> On 10/25/20 15:36, Yun Hsiang wrote:
>
> [...]
>
> >>>> #define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
> >>>> - SCHED_FLAG_UTIL_CLAMP_MAX)
> >>>> + SCHED_FLAG_UTIL_CLAMP_MAX | \
> >>>> + SCHED_FLAG_UTIL_CLAMP_RESET)
> >>>
> >>> Is it safe to change this define in a uapi header without a potential
> >>> consequence?
>
> AFAICS, there're 3 occurrences, besides the one in
> __setscheduler_uclamp(), in which we use SCHED_FLAG_UTIL_CLAMP.
>
> 1) call uclamp_validate() in __sched_setscheduler()
>
> 2) jump to 'change' label in __sched_setscheduler()
>
> 3) check that the uattr->size is SCHED_ATTR_SIZE_VER1 in
> sched_copy_attr()
>
> 2) and 3) require SCHED_FLAG_UTIL_CLAMP_RESET to be part of
> SCHED_FLAG_UTIL_CLAMP but IMHO 1) needs this change:
>
> @@ -1413,8 +1413,14 @@ int sysctl_sched_uclamp_handler(struct ctl_table
> *table, int write,
> static int uclamp_validate(struct task_struct *p,
> const struct sched_attr *attr)
> {
> - unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;
> - unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value;
> + unsigned int lower_bound, upper_bound;
> +
> + /* Do not check uclamp attributes values in reset case. */
> + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_RESET)
> + return 0;
> +
> + lower_bound = p->uclamp_req[UCLAMP_MIN].value;
> + upper_bound = p->uclamp_req[UCLAMP_MAX].value;
>
> if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN)
> lower_bound = attr->sched_util_min;
>
> Otherwise a bogus sa.sched_util_min or sa.sched_util_max with
> SCHED_FLAG_UTIL_CLAMP_RESET could return -EINVAL.
I haven't checked the details of this implementation but I interpret that you
agree it's better to leave the define in the uapi header intact.
> >>> FWIW I still have concerns about this approach. We're doing a reset to control
> >>> cgroup behavior, I don't see any correlation between the two. Besides the
> >>> difference between RESET and setting uclamp_min=0 without RESET is not obvious
> >>> nor intuitive for someone who didn't look at the code.
> >>>
> >>> I propose something like the below which is more explicit about what is being
> >>> requested and delivered here. And if we decide to deprecate this behavior,
> >>> it'd be much easier to just ignore this flag.
> >>>
> >>> You must set this flag with your uclamp request to retain the cgroup
> >>> inheritance behavior. If the flag is not set, we automatically clear it.
> >>
> >> I think this behavior may not meet android requirement. Becasue in
> >> android there is group like top-app. And we want to boost the
> >> group by setting group uclamp_min. If group inheritance is explicit, we
> >> need to set this flag for all the tasks in top-app. This might be
> >> costly.
> >
> > You will not have to set it for every task. It's on by default like it works
> > now. This behavior doesn't change.
> >
> > But if you change the uclamp value of a task but still want it to continue to
> > inherit the cgroup values if it's attached to one, you must set this flag when
> > changing the uclamp value.
>
> I'm not really fond of this idea because:
>
> (1) explicit cgroup(-behavior) related settings through a per-task user
> interface.
But this is what we're doing whether it's called RESET or something else here.
Ie: cgroup behavior is the purpose of this change.
> (2) uclamp reset makes already sense in the !CONFIG_UCLAMP_TASK_GROUP
> case. A task can reset its uclamp values here as well, and then
> 'inheriting' the system defaults again. Already mentioned in
> https://lkml.kernel.org/r/[email protected]
Yes and no. Generic RESET yes makes sense in general. But not for the intended
use case here which depends on CONFIG_UCLAMP_TASK_GROUP. And not in practice
since only RT tasks have a difference default value that this RESET makes sense
to implement for. But there's no real requirement for that yet. The requirement
is to control cgroup. We're inventing the RESET flag to handle the cgroup case.
So it not making sense for !CONFIG_UCLAMP_TASK_GROUP is the correct outcome.
In my view, I see a contradiction between what we're implementing and what's
required. This subtlety is confusing. It could be just me maybe..
Anyway I won't argue further if you really prefer the RESET way :-)
Thanks
--
Qais Yousef