2021-06-09 18:16:56

by Quentin Perret

[permalink] [raw]
Subject: [PATCH] sched: Make uclamp changes depend on CAP_SYS_NICE

There is currently nothing preventing tasks from changing their per-task
clamp values in anyway that they like. The rationale is probably that
systems administrator are still able to limit those clamps thanks to the
cgroup interface. However, this causes pain in a system where both
per-task and per-cgroup clamps values are expected to be under the
control of core system components (as is the case for Android).

To fix this, let's require CAP_SYS_NICE to increase per-task clamp
values. This allows unprivileged tasks to lower their requests, but not
increase them, which is consistent with the existing behaviour for nice
values.

Signed-off-by: Quentin Perret <[email protected]>
---
kernel/sched/core.c | 48 ++++++++++++++++++++++++++++++++++++++-------
1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1d4aedbbcf96..1e5f9ae441a0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1430,6 +1430,11 @@ static int uclamp_validate(struct task_struct *p,
if (util_min != -1 && util_max != -1 && util_min > util_max)
return -EINVAL;

+ return 0;
+}
+
+static void uclamp_enable(void)
+{
/*
* We have valid uclamp attributes; make sure uclamp is enabled.
*
@@ -1438,8 +1443,25 @@ static int uclamp_validate(struct task_struct *p,
* scheduler locks.
*/
static_branch_enable(&sched_uclamp_used);
+}

- return 0;
+static bool uclamp_reduce(struct task_struct *p, const struct sched_attr *attr)
+{
+ int util_min, util_max;
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
+ util_min = p->uclamp_req[UCLAMP_MIN].value;
+ if (attr->sched_util_min > util_min)
+ return false;
+ }
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
+ util_max = p->uclamp_req[UCLAMP_MAX].value;
+ if (attr->sched_util_max > util_max)
+ return false;
+ }
+
+ return true;
}

static bool uclamp_reset(const struct sched_attr *attr,
@@ -1580,6 +1602,11 @@ static inline int uclamp_validate(struct task_struct *p,
{
return -EOPNOTSUPP;
}
+static inline void uclamp_enable(void) { }
+static bool uclamp_reduce(struct task_struct *p, const struct sched_attr *attr)
+{
+ return true;
+}
static void __setscheduler_uclamp(struct task_struct *p,
const struct sched_attr *attr) { }
static inline void uclamp_fork(struct task_struct *p) { }
@@ -6116,6 +6143,13 @@ static int __sched_setscheduler(struct task_struct *p,
(rt_policy(policy) != (attr->sched_priority != 0)))
return -EINVAL;

+ /* Update task specific "requested" clamps */
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
+ retval = uclamp_validate(p, attr);
+ if (retval)
+ return retval;
+ }
+
/*
* Allow unprivileged RT tasks to decrease priority:
*/
@@ -6165,6 +6199,10 @@ static int __sched_setscheduler(struct task_struct *p,
/* Normal users shall not reset the sched_reset_on_fork flag: */
if (p->sched_reset_on_fork && !reset_on_fork)
return -EPERM;
+
+ /* Can't increase util-clamps */
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP && !uclamp_reduce(p, attr))
+ return -EPERM;
}

if (user) {
@@ -6176,12 +6214,8 @@ static int __sched_setscheduler(struct task_struct *p,
return retval;
}

- /* Update task specific "requested" clamps */
- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
- retval = uclamp_validate(p, attr);
- if (retval)
- return retval;
- }
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
+ uclamp_enable();

if (pi)
cpuset_read_lock();
--
2.32.0.272.g935e593368-goog


2021-06-10 03:38:25

by Xuewen Yan

[permalink] [raw]
Subject: Re: [PATCH] sched: Make uclamp changes depend on CAP_SYS_NICE

On Thu, Jun 10, 2021 at 2:16 AM Quentin Perret <[email protected]> wrote:
>
> There is currently nothing preventing tasks from changing their per-task
> clamp values in anyway that they like. The rationale is probably that
> systems administrator are still able to limit those clamps thanks to the
> cgroup interface. However, this causes pain in a system where both
> per-task and per-cgroup clamps values are expected to be under the
> control of core system components (as is the case for Android).
>
> To fix this, let's require CAP_SYS_NICE to increase per-task clamp
> values. This allows unprivileged tasks to lower their requests, but not
> increase them, which is consistent with the existing behaviour for nice
> values.
>
> Signed-off-by: Quentin Perret <[email protected]>
> ---
> kernel/sched/core.c | 48 ++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 41 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 1d4aedbbcf96..1e5f9ae441a0 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1430,6 +1430,11 @@ static int uclamp_validate(struct task_struct *p,
> if (util_min != -1 && util_max != -1 && util_min > util_max)
> return -EINVAL;
>
> + return 0;
> +}
> +
> +static void uclamp_enable(void)
> +{
> /*
> * We have valid uclamp attributes; make sure uclamp is enabled.
> *
> @@ -1438,8 +1443,25 @@ static int uclamp_validate(struct task_struct *p,
> * scheduler locks.
> */
> static_branch_enable(&sched_uclamp_used);
> +}
>
> - return 0;
> +static bool uclamp_reduce(struct task_struct *p, const struct sched_attr *attr)
> +{
> + int util_min, util_max;
> +
> + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> + util_min = p->uclamp_req[UCLAMP_MIN].value;
> + if (attr->sched_util_min > util_min)
> + return false;
> + }
> +
> + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
> + util_max = p->uclamp_req[UCLAMP_MAX].value;
> + if (attr->sched_util_max > util_max)
> + return false;

when the attr->sched_util_max = -1, and the util_max < 1024, here may
should return false, but it would return ture.

Thanks
xuewen
> + }
> +
> + return true;
> }
>
> static bool uclamp_reset(const struct sched_attr *attr,
> @@ -1580,6 +1602,11 @@ static inline int uclamp_validate(struct task_struct *p,
> {
> return -EOPNOTSUPP;
> }
> +static inline void uclamp_enable(void) { }
> +static bool uclamp_reduce(struct task_struct *p, const struct sched_attr *attr)
> +{
> + return true;
> +}
> static void __setscheduler_uclamp(struct task_struct *p,
> const struct sched_attr *attr) { }
> static inline void uclamp_fork(struct task_struct *p) { }
> @@ -6116,6 +6143,13 @@ static int __sched_setscheduler(struct task_struct *p,
> (rt_policy(policy) != (attr->sched_priority != 0)))
> return -EINVAL;
>
> + /* Update task specific "requested" clamps */
> + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
> + retval = uclamp_validate(p, attr);
> + if (retval)
> + return retval;
> + }
> +
> /*
> * Allow unprivileged RT tasks to decrease priority:
> */
> @@ -6165,6 +6199,10 @@ static int __sched_setscheduler(struct task_struct *p,
> /* Normal users shall not reset the sched_reset_on_fork flag: */
> if (p->sched_reset_on_fork && !reset_on_fork)
> return -EPERM;
> +
> + /* Can't increase util-clamps */
> + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP && !uclamp_reduce(p, attr))
> + return -EPERM;
> }
>
> if (user) {
> @@ -6176,12 +6214,8 @@ static int __sched_setscheduler(struct task_struct *p,
> return retval;
> }
>
> - /* Update task specific "requested" clamps */
> - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
> - retval = uclamp_validate(p, attr);
> - if (retval)
> - return retval;
> - }
> + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
> + uclamp_enable();
>
> if (pi)
> cpuset_read_lock();
> --
> 2.32.0.272.g935e593368-goog
>

2021-06-10 07:48:15

by Quentin Perret

[permalink] [raw]
Subject: Re: [PATCH] sched: Make uclamp changes depend on CAP_SYS_NICE

On Thursday 10 Jun 2021 at 11:33:04 (+0800), Xuewen Yan wrote:
> On Thu, Jun 10, 2021 at 2:16 AM Quentin Perret <[email protected]> wrote:
> > +static bool uclamp_reduce(struct task_struct *p, const struct sched_attr *attr)
> > +{
> > + int util_min, util_max;
> > +
> > + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
> > + util_min = p->uclamp_req[UCLAMP_MIN].value;
> > + if (attr->sched_util_min > util_min)
> > + return false;
> > + }
> > +
> > + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
> > + util_max = p->uclamp_req[UCLAMP_MAX].value;
> > + if (attr->sched_util_max > util_max)
> > + return false;
>
> when the attr->sched_util_max = -1, and the util_max < 1024, here may
> should return false, but it would return ture.

Aha, indeed, I missed that -1 could be used to reset the clamps. I'll
send a v2.

Thanks!
Quentin