LinuxLists.cc - [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched

2019-07-26 19:23:55

Subject: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

Cc: Daniel Bristot de Oliveira <[email protected]>
Cc: Luca Abeni <[email protected]>
Cc: Juri Lelli <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
include/linux/sched/sysctl.h | 3 +++
kernel/sched/deadline.c | 23 +++++++++++++++++++++--
kernel/sysctl.c | 14 ++++++++++++++
3 files changed, 38 insertions(+), 2 deletions(-)

--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -56,6 +56,9 @@ int sched_proc_update_handler(struct ctl
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;

+extern unsigned int sysctl_sched_dl_period_max;
+extern unsigned int sysctl_sched_dl_period_min;
+
#ifdef CONFIG_UCLAMP_TASK
extern unsigned int sysctl_sched_uclamp_util_min;
extern unsigned int sysctl_sched_uclamp_util_max;
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2597,6 +2597,14 @@ void __getparam_dl(struct task_struct *p
}

/*
+ * Default limits for DL period; on the top end we guard against small util
+ * tasks still getting rediculous long effective runtimes, on the bottom end we
+ * guard against timer DoS.
+ */
+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
+unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
+
+/*
* This function validates the new parameters of a -deadline task.
* We ask for the deadline not being zero, and greater or equal
* than the runtime, as well as the period of being zero or
@@ -2608,6 +2616,8 @@ void __getparam_dl(struct task_struct *p
*/
bool __checkparam_dl(const struct sched_attr *attr)
{
+ u64 period, max, min;
+
/* special dl tasks don't actually use any parameter */
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return true;
@@ -2631,12 +2641,21 @@ bool __checkparam_dl(const struct sched_
attr->sched_period & (1ULL << 63))
return false;

+ period = attr->sched_period;
+ if (!period)
+ period = attr->sched_deadline;
+
/* runtime <= deadline <= period (if period != 0) */
- if ((attr->sched_period != 0 &&
- attr->sched_period < attr->sched_deadline) ||
+ if (period < attr->sched_deadline ||
attr->sched_deadline < attr->sched_runtime)
return false;

+ max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
+ min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
+
+ if (period < min || period > max)
+ return false;
+
return true;
}

--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -443,6 +443,20 @@ static struct ctl_table kern_table[] = {
.proc_handler = sched_rt_handler,
},
{
+ .procname = "sched_deadline_period_max_us",
+ .data = &sysctl_sched_dl_period_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_deadline_period_min_us",
+ .data = &sysctl_sched_dl_period_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "sched_rr_timeslice_ms",
.data = &sysctl_sched_rr_timeslice,
.maxlen = sizeof(int),

2019-07-29 11:46:58

by Daniel Bristot de Oliveira

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On 29/07/2019 10:57, Juri Lelli wrote:
> Hi,
>
> On 26/07/19 16:54, Peter Zijlstra wrote:
>> Cc: Daniel Bristot de Oliveira <[email protected]>
>> Cc: Luca Abeni <[email protected]>
>> Cc: Juri Lelli <[email protected]>
>> Cc: Dmitry Vyukov <[email protected]>
>> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
>> ---
>> include/linux/sched/sysctl.h | 3 +++
>> kernel/sched/deadline.c | 23 +++++++++++++++++++++--
>> kernel/sysctl.c | 14 ++++++++++++++
>> 3 files changed, 38 insertions(+), 2 deletions(-)
>>
>> --- a/include/linux/sched/sysctl.h
>> +++ b/include/linux/sched/sysctl.h
>> @@ -56,6 +56,9 @@ int sched_proc_update_handler(struct ctl
>> extern unsigned int sysctl_sched_rt_period;
>> extern int sysctl_sched_rt_runtime;
>>
>> +extern unsigned int sysctl_sched_dl_period_max;
>> +extern unsigned int sysctl_sched_dl_period_min;
>> +
>> #ifdef CONFIG_UCLAMP_TASK
>> extern unsigned int sysctl_sched_uclamp_util_min;
>> extern unsigned int sysctl_sched_uclamp_util_max;
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -2597,6 +2597,14 @@ void __getparam_dl(struct task_struct *p
>> }
>>
>> /*
>> + * Default limits for DL period; on the top end we guard against small util
>> + * tasks still getting rediculous long effective runtimes, on the bottom end we
> s/rediculous/ridiculous/
>
>> + * guard against timer DoS.
>> + */
>> +unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
>> +unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
> These limits look sane to me. I've actually been experimenting with 10us
> period tasks and throttling seemed to behave fine, but I guess 100us is
> a saner default.
>
> So, (with a few lines of changelog :)
>
> Acked-by: Juri Lelli <[email protected]>

Looks sane to me too!

Acked-by: Daniel Bristot de Oliveira <[email protected]>

-- Daniel

2019-07-29 12:12:14

by Juri Lelli

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

Hi,

On 26/07/19 16:54, Peter Zijlstra wrote:
>
> Cc: Daniel Bristot de Oliveira <[email protected]>
> Cc: Luca Abeni <[email protected]>
> Cc: Juri Lelli <[email protected]>
> Cc: Dmitry Vyukov <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> include/linux/sched/sysctl.h | 3 +++
> kernel/sched/deadline.c | 23 +++++++++++++++++++++--
> kernel/sysctl.c | 14 ++++++++++++++
> 3 files changed, 38 insertions(+), 2 deletions(-)
>
> --- a/include/linux/sched/sysctl.h
> +++ b/include/linux/sched/sysctl.h
> @@ -56,6 +56,9 @@ int sched_proc_update_handler(struct ctl
> extern unsigned int sysctl_sched_rt_period;
> extern int sysctl_sched_rt_runtime;
>
> +extern unsigned int sysctl_sched_dl_period_max;
> +extern unsigned int sysctl_sched_dl_period_min;
> +
> #ifdef CONFIG_UCLAMP_TASK
> extern unsigned int sysctl_sched_uclamp_util_min;
> extern unsigned int sysctl_sched_uclamp_util_max;
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -2597,6 +2597,14 @@ void __getparam_dl(struct task_struct *p
> }
>
> /*
> + * Default limits for DL period; on the top end we guard against small util
> + * tasks still getting rediculous long effective runtimes, on the bottom end we

s/rediculous/ridiculous/

> + * guard against timer DoS.
> + */
> +unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
> +unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */

These limits look sane to me. I've actually been experimenting with 10us
period tasks and throttling seemed to behave fine, but I guess 100us is
a saner default.

So, (with a few lines of changelog :)

Acked-by: Juri Lelli <[email protected]>

Thanks for doing this.

Best,

Juri

2019-08-03 16:19:55

by Alessio Balsini

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

Hi Peter,

This is indeed an important missing piece.

I think it would be worth having some simple checks, e.g.:
- period_min <= period_max;
- period_min >= (1ULL << DL_SCALE).

To be even more picky, I'm wondering if it would be a good practice to
fail the write operation if there are already dl-tasks in the system
that violate the new constraint. I'm afraid there is no cheap way of
achieving such check, so, I think we can skip this last tricky thing for
now.

Thanks,
Alessio

On Fri, Jul 26, 2019 at 04:54:10PM +0200, Peter Zijlstra wrote:
>
> Cc: Daniel Bristot de Oliveira <[email protected]>
> Cc: Luca Abeni <[email protected]>
> Cc: Juri Lelli <[email protected]>
> Cc: Dmitry Vyukov <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> include/linux/sched/sysctl.h | 3 +++
> kernel/sched/deadline.c | 23 +++++++++++++++++++++--
> kernel/sysctl.c | 14 ++++++++++++++
> 3 files changed, 38 insertions(+), 2 deletions(-)
>
> --- a/include/linux/sched/sysctl.h
> +++ b/include/linux/sched/sysctl.h
> @@ -56,6 +56,9 @@ int sched_proc_update_handler(struct ctl
> extern unsigned int sysctl_sched_rt_period;
> extern int sysctl_sched_rt_runtime;
>
> +extern unsigned int sysctl_sched_dl_period_max;
> +extern unsigned int sysctl_sched_dl_period_min;
> +
> #ifdef CONFIG_UCLAMP_TASK
> extern unsigned int sysctl_sched_uclamp_util_min;
> extern unsigned int sysctl_sched_uclamp_util_max;
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -2597,6 +2597,14 @@ void __getparam_dl(struct task_struct *p
> }
>
> /*
> + * Default limits for DL period; on the top end we guard against small util
> + * tasks still getting rediculous long effective runtimes, on the bottom end we
> + * guard against timer DoS.
> + */
> +unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
> +unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
> +
> +/*
> * This function validates the new parameters of a -deadline task.
> * We ask for the deadline not being zero, and greater or equal
> * than the runtime, as well as the period of being zero or
> @@ -2608,6 +2616,8 @@ void __getparam_dl(struct task_struct *p
> */
> bool __checkparam_dl(const struct sched_attr *attr)
> {
> + u64 period, max, min;
> +
> /* special dl tasks don't actually use any parameter */
> if (attr->sched_flags & SCHED_FLAG_SUGOV)
> return true;
> @@ -2631,12 +2641,21 @@ bool __checkparam_dl(const struct sched_
> attr->sched_period & (1ULL << 63))
> return false;
>
> + period = attr->sched_period;
> + if (!period)
> + period = attr->sched_deadline;
> +
> /* runtime <= deadline <= period (if period != 0) */
> - if ((attr->sched_period != 0 &&
> - attr->sched_period < attr->sched_deadline) ||
> + if (period < attr->sched_deadline ||
> attr->sched_deadline < attr->sched_runtime)
> return false;
>
> + max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
> + min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
> +
> + if (period < min || period > max)
> + return false;
> +
> return true;
> }
>
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -443,6 +443,20 @@ static struct ctl_table kern_table[] = {
> .proc_handler = sched_rt_handler,
> },
> {
> + .procname = "sched_deadline_period_max_us",
> + .data = &sysctl_sched_dl_period_max,
> + .maxlen = sizeof(unsigned int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec,
> + },
> + {
> + .procname = "sched_deadline_period_min_us",
> + .data = &sysctl_sched_dl_period_min,
> + .maxlen = sizeof(unsigned int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec,
> + },
> + {
> .procname = "sched_rr_timeslice_ms",
> .data = &sysctl_sched_rr_timeslice,
> .maxlen = sizeof(int),
>
>

2019-08-05 11:55:12

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On Fri, Aug 02, 2019 at 06:21:04PM +0100, Alessio Balsini wrote:
> Hi Peter,
>
> This is indeed an important missing piece.
>
> I think it would be worth having some simple checks, e.g.:
> - period_min <= period_max;
> - period_min >= (1ULL << DL_SCALE).
>
> To be even more picky, I'm wondering if it would be a good practice to
> fail the write operation if there are already dl-tasks in the system
> that violate the new constraint. I'm afraid there is no cheap way of
> achieving such check, so, I think we can skip this last tricky thing for
> now.

Like so?

---
Subject: sched/deadline: Impose global limits on sched_attr::sched_period
From: Peter Zijlstra <[email protected]>
Date: Tue Jul 23 16:01:29 CEST 2019

There are two DoS scenarios with SCHED_DEADLINE related to
sched_attr::sched_period:

- since access-control only looks at utilization and density, a very
large period can allow a very large runtime, which in turn can
incur a very large latency to lower priority tasks.

- for very short periods we can end up spending more time programming
the hardware timer than actually running the task.

Mitigate these by imposing limits on the period.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Daniel Bristot de Oliveira <[email protected]>
Cc: Luca Abeni <[email protected]>
Cc: Juri Lelli <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
---
include/linux/sched/sysctl.h | 7 ++++
kernel/sched/deadline.c | 61 +++++++++++++++++++++++++++++++++++++++++--
kernel/sysctl.c | 14 +++++++++
3 files changed, 80 insertions(+), 2 deletions(-)

--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -56,6 +56,13 @@ int sched_proc_update_handler(struct ctl
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;

+extern unsigned int sysctl_sched_dl_period_max;
+extern unsigned int sysctl_sched_dl_period_min;
+
+extern int sched_dl_period_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+
#ifdef CONFIG_UCLAMP_TASK
extern unsigned int sysctl_sched_uclamp_util_min;
extern unsigned int sysctl_sched_uclamp_util_max;
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2660,6 +2660,52 @@ void __getparam_dl(struct task_struct *p
}

/*
+ * Default limits for DL period; on the top end we guard against small util
+ * tasks still getting ridiculous long effective runtimes, on the bottom end we
+ * guard against timer DoS.
+ */
+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
+unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
+
+int sched_dl_period_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ unsigned int old_min, old_max;
+ static DEFINE_MUTEX(mutex);
+ int ret;
+
+ mutex_lock(&mutex);
+ old_min = sysctl_sched_dl_period_min;
+ old_max = sysctl_sched_dl_period_max;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (!ret && write) {
+ u64 min = (u64)sysctl_sched_dl_period_min * NSEC_PER_USEC;
+ u64 max = (u64)sysctl_sched_dl_period_man * NSEC_PER_USEC;
+
+ ret = -EINVAL;
+ if (min < 1ULL << DL_SCALE)
+ goto undo;
+
+ if (max < min)
+ goto undo;
+
+ ret = 0;
+ }
+
+ if (0) {
+undo:
+ sysctl_sched_rt_period = old_period;
+ sysctl_sched_rt_runtime = old_runtime;
+ }
+ mutex_unlock(&mutex);
+
+ return ret;
+}
+
+/*
* This function validates the new parameters of a -deadline task.
* We ask for the deadline not being zero, and greater or equal
* than the runtime, as well as the period of being zero or
@@ -2671,6 +2717,8 @@ void __getparam_dl(struct task_struct *p
*/
bool __checkparam_dl(const struct sched_attr *attr)
{
+ u64 period, max, min;
+
/* special dl tasks don't actually use any parameter */
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return true;
@@ -2694,12 +2742,21 @@ bool __checkparam_dl(const struct sched_
attr->sched_period & (1ULL << 63))
return false;

+ period = attr->sched_period;
+ if (!period)
+ period = attr->sched_deadline;
+
/* runtime <= deadline <= period (if period != 0) */
- if ((attr->sched_period != 0 &&
- attr->sched_period < attr->sched_deadline) ||
+ if (period < attr->sched_deadline ||
attr->sched_deadline < attr->sched_runtime)
return false;

+ max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
+ min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
+
+ if (period < min || period > max)
+ return false;
+
return true;
}

--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -443,6 +443,20 @@ static struct ctl_table kern_table[] = {
.proc_handler = sched_rt_handler,
},
{
+ .procname = "sched_deadline_period_max_us",
+ .data = &sysctl_sched_dl_period_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_dl_period_handler,
+ },
+ {
+ .procname = "sched_deadline_period_min_us",
+ .data = &sysctl_sched_dl_period_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_dl_period_handler,
+ },
+ {
.procname = "sched_rr_timeslice_ms",
.data = &sysctl_sched_rr_timeslice,
.maxlen = sizeof(int),

2019-08-22 16:56:53

by Alessio Balsini

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On Mon, Aug 05, 2019 at 01:53:09PM +0200, Peter Zijlstra wrote:
>
> Like so?
>

Yes, that's exactly what I meant!
What about this refactoring?

Thanks,
Alessio

---
From 459d5488acb3fac938b0f35f480a81a6e401ef92 Mon Sep 17 00:00:00 2001
From: Alessio Balsini <[email protected]>
Date: Thu, 22 Aug 2019 12:55:55 +0100
Subject: [PATCH] sched/deadline: Impose global limits on
sched_attr::sched_period

There are two DoS scenarios with SCHED_DEADLINE related to
sched_attr::sched_period:

- since access-control only looks at utilization and density, a very
large period can allow a very large runtime, which in turn can
incur a very large latency to lower priority tasks.

- for very short periods we can end up spending more time programming
the hardware timer than actually running the task.

Mitigate these by imposing limits on the period.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Alessio Balsini <[email protected]>
---
include/linux/sched/sysctl.h | 7 +++++
kernel/sched/deadline.c | 51 ++++++++++++++++++++++++++++++++++--
kernel/sysctl.c | 14 ++++++++++
3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index d4f6215ee03f7..7c8ef07e52133 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -56,6 +56,13 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;

+extern unsigned int sysctl_sched_dl_period_max;
+extern unsigned int sysctl_sched_dl_period_min;
+
+extern int sched_dl_period_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+
#ifdef CONFIG_UCLAMP_TASK
extern unsigned int sysctl_sched_uclamp_util_min;
extern unsigned int sysctl_sched_uclamp_util_max;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 0b9cbfb2b1d4f..fcdf70d9c0516 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2640,6 +2640,42 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
attr->sched_flags = dl_se->flags;
}

+/*
+ * Default limits for DL period: on the top end we guard against small util
+ * tasks still getting ridiculous long effective runtimes, on the bottom end we
+ * guard against timer DoS.
+ */
+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
+unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
+
+int sched_dl_period_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ unsigned int old_max, old_min;
+ static DEFINE_MUTEX(mutex);
+ int ret;
+
+ mutex_lock(&mutex);
+ old_max = sysctl_sched_dl_period_max;
+ old_min = sysctl_sched_dl_period_min;
+
+ ret = proc_douintvec(table, write, buffer, lenp, ppos);
+ if (!ret && write) {
+ u64 max = (u64)sysctl_sched_dl_period_max * NSEC_PER_USEC;
+ u64 min = (u64)sysctl_sched_dl_period_min * NSEC_PER_USEC;
+
+ if (min < 1ULL << DL_SCALE || max < min) {
+ sysctl_sched_dl_period_max = old_max;
+ sysctl_sched_dl_period_min = old_min;
+ ret = -EINVAL;
+ }
+ }
+ mutex_unlock(&mutex);
+
+ return ret;
+}
+
/*
* This function validates the new parameters of a -deadline task.
* We ask for the deadline not being zero, and greater or equal
@@ -2652,6 +2688,8 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
*/
bool __checkparam_dl(const struct sched_attr *attr)
{
+ u64 period, max, min;
+
/* special dl tasks don't actually use any parameter */
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return true;
@@ -2675,12 +2713,21 @@ bool __checkparam_dl(const struct sched_attr *attr)
attr->sched_period & (1ULL << 63))
return false;

+ period = attr->sched_period;
+ if (!period)
+ period = attr->sched_deadline;
+
/* runtime <= deadline <= period (if period != 0) */
- if ((attr->sched_period != 0 &&
- attr->sched_period < attr->sched_deadline) ||
+ if (period < attr->sched_deadline ||
attr->sched_deadline < attr->sched_runtime)
return false;

+ max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
+ min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
+
+ if (period < min || period > max)
+ return false;
+
return true;
}

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 078950d9605ba..0d07e4707e9d2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -442,6 +442,20 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rt_handler,
},
+ {
+ .procname = "sched_deadline_period_max_us",
+ .data = &sysctl_sched_dl_period_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_dl_period_handler,
+ },
+ {
+ .procname = "sched_deadline_period_min_us",
+ .data = &sysctl_sched_dl_period_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_dl_period_handler,
+ },
{
.procname = "sched_rr_timeslice_ms",
.data = &sysctl_sched_rr_timeslice,
--
2.23.0.187.g17f5b7556c-goog

2019-08-22 21:57:24

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On Thu, Aug 22, 2019 at 01:29:49PM +0100, Alessio Balsini wrote:
> Yes, that's exactly what I meant!
> What about this refactoring?

Makes sense; and now I spot a race (and sched_rt_handler() from which I
copied this is susceptible too):

> +int sched_dl_period_handler(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp,
> + loff_t *ppos)
> +{
> + unsigned int old_max, old_min;
> + static DEFINE_MUTEX(mutex);
> + int ret;
> +
> + mutex_lock(&mutex);
> + old_max = sysctl_sched_dl_period_max;
> + old_min = sysctl_sched_dl_period_min;
> +
> + ret = proc_douintvec(table, write, buffer, lenp, ppos);

Here the sysctl_* values have been changed, interleave with:

> + if (!ret && write) {
> + u64 max = (u64)sysctl_sched_dl_period_max * NSEC_PER_USEC;
> + u64 min = (u64)sysctl_sched_dl_period_min * NSEC_PER_USEC;
> +
> + if (min < 1ULL << DL_SCALE || max < min) {
> + sysctl_sched_dl_period_max = old_max;
> + sysctl_sched_dl_period_min = old_min;
> + ret = -EINVAL;
> + }
> + }
> + mutex_unlock(&mutex);
> +
> + return ret;
> +}

> @@ -2675,12 +2713,21 @@ bool __checkparam_dl(const struct sched_attr *attr)
> attr->sched_period & (1ULL << 63))
> return false;
>
> + period = attr->sched_period;
> + if (!period)
> + period = attr->sched_deadline;
> +
> /* runtime <= deadline <= period (if period != 0) */
> - if ((attr->sched_period != 0 &&
> - attr->sched_period < attr->sched_deadline) ||
> + if (period < attr->sched_deadline ||
> attr->sched_deadline < attr->sched_runtime)
> return false;
>
> + max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
> + min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;

this, and we're using unvalidated numbers.

> + if (period < min || period > max)
> + return false;
> +
> return true;
> }

Something like the completely untested (again, sorry) below ought to
cure that I think; the same needs doing to sched_rt_handler() I'm
thinking.

---
Subject: sched/deadline: Impose global limits on sched_attr::sched_period
From: Alessio Balsini <[email protected]>
Date: Thu, 22 Aug 2019 13:29:49 +0100

There are two DoS scenarios with SCHED_DEADLINE related to
sched_attr::sched_period:

- since access-control only looks at utilization and density, a very
large period can allow a very large runtime, which in turn can
incur a very large latency to lower priority tasks.

- for very short periods we can end up spending more time programming
the hardware timer than actually running the task.

Mitigate these by imposing limits on the period.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Alessio Balsini <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---
include/linux/sched/sysctl.h | 7 +++++
kernel/sched/deadline.c | 58 +++++++++++++++++++++++++++++++++++++++++--
kernel/sysctl.c | 14 ++++++++++
3 files changed, 77 insertions(+), 2 deletions(-)

--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -56,6 +56,13 @@ int sched_proc_update_handler(struct ctl
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;

+extern unsigned int sysctl_sched_dl_period_max;
+extern unsigned int sysctl_sched_dl_period_min;
+
+extern int sched_dl_period_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+
#ifdef CONFIG_UCLAMP_TASK
extern unsigned int sysctl_sched_uclamp_util_min;
extern unsigned int sysctl_sched_uclamp_util_max;
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2633,6 +2633,49 @@ void __getparam_dl(struct task_struct *p
}

/*
+ * Default limits for DL period: on the top end we guard against small util
+ * tasks still getting ridiculous long effective runtimes, on the bottom end we
+ * guard against timer DoS.
+ */
+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
+unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
+
+int sched_dl_period_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ unsigned int new_max, new_min;
+ struct ctl_table new_table;
+ static DEFINE_MUTEX(mutex);
+ int ret;
+
+ mutex_lock(&mutex);
+ new_max = sysctl_sched_dl_period_max;
+ new_min = sysctl_sched_dl_period_min;
+ new_table = *table;
+ if (new_table.data == &sysctl_sched_dl_period_max)
+ new_table.data = &new_max;
+ else
+ new_table.data = &new_min;
+
+ ret = proc_douintvec(&new_table, write, buffer, lenp, ppos);
+ if (!ret && write) {
+ u64 max = (u64)new_max * NSEC_PER_USEC;
+ u64 min = (u64)new_min * NSEC_PER_USEC;
+
+ if (min > 1ULL << DL_SCALE && max > min) {
+ WRITE_ONCE(sysctl_sched_dl_period_max, new_max);
+ WRITE_ONCE(sysctl_sched_dl_period_min, new_min);
+ } else {
+ ret = -EINVAL;
+ }
+ }
+ mutex_unlock(&mutex);
+
+ return ret;
+}
+
+/*
* This function validates the new parameters of a -deadline task.
* We ask for the deadline not being zero, and greater or equal
* than the runtime, as well as the period of being zero or
@@ -2644,6 +2687,8 @@ void __getparam_dl(struct task_struct *p
*/
bool __checkparam_dl(const struct sched_attr *attr)
{
+ u64 period, max, min;
+
/* special dl tasks don't actually use any parameter */
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return true;
@@ -2667,12 +2712,21 @@ bool __checkparam_dl(const struct sched_
attr->sched_period & (1ULL << 63))
return false;

+ period = attr->sched_period;
+ if (!period)
+ period = attr->sched_deadline;
+
/* runtime <= deadline <= period (if period != 0) */
- if ((attr->sched_period != 0 &&
- attr->sched_period < attr->sched_deadline) ||
+ if (period < attr->sched_deadline ||
attr->sched_deadline < attr->sched_runtime)
return false;

+ max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
+ min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
+
+ if (period < min || period > max)
+ return false;
+
return true;
}

--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -443,6 +443,20 @@ static struct ctl_table kern_table[] = {
.proc_handler = sched_rt_handler,
},
{
+ .procname = "sched_deadline_period_max_us",
+ .data = &sysctl_sched_dl_period_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_dl_period_handler,
+ },
+ {
+ .procname = "sched_deadline_period_min_us",
+ .data = &sysctl_sched_dl_period_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_dl_period_handler,
+ },
+ {
.procname = "sched_rr_timeslice_ms",
.data = &sysctl_sched_rr_timeslice,
.maxlen = sizeof(int),

2019-08-31 14:43:23

by Alessio Balsini

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

Right!

Verified that sysctl_sched_dl_period_max and sysctl_sched_dl_period_min values
are now always consistent.

I spent some time in trying to figure out if not having any mutex in
__checkparam_dl() is safe. There can surely happen that "max < min", e.g.:

| | periods
User1 | User2 | checkparam_dl() | sysctl_sched_dl_*
----------|--------------|------------------|-------------------
| | | [x, x]
p_min = 5 | | |
| | | [5, x]
p_max = 5 | | |
| | | [5, 5]
| setattr(p=8) | |
| | p = 8 |
| | [x, 5] |
p_max = 9 | | |
| | | [5, 9]
p_min = 6 | | |
| | | [6, 9]
| | [6, 5] |
----------|--------------|------------------|-------------------

Sharing my thoughts, a "BUG_ON(max < min)" in __checkparam_dl() is then a
guaranteed source of explosions, but the good news is that "if (period < min ||
period > max" in __checkparam_dl() surely fails if "max < min". Also the fact
that, when we are writing the new sysctl_sched_dl_* values, only one is
actually changed at a time, that surely helps to preserve the consistency.

But is that enough?

Well, I couldn't find any counterexample to make __checkparam_dl() pass with
wrong parameters. And the tests I made are happy.

On Thu, Aug 22, 2019 at 06:51:25PM +0200, Peter Zijlstra wrote:
> include/linux/sched/sysctl.h | 7 +++++
> kernel/sched/deadline.c | 58 +++++++++++++++++++++++++++++++++++++++++--
> kernel/sysctl.c | 14 ++++++++++
> 3 files changed, 77 insertions(+), 2 deletions(-)
>
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> +int sched_dl_period_handler(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp,
> + loff_t *ppos)
> +{
> + if (min > 1ULL << DL_SCALE && max > min) {

s/>/>=/

> + WRITE_ONCE(sysctl_sched_dl_period_max, new_max);
> + WRITE_ONCE(sysctl_sched_dl_period_min, new_min);

Besides the inline comment above, this is my ack to your patch.

Otherwise, here follows a slightly more convoluted version of your patch with a
couple of changes to sched_dl_period_handler(), summarized as:
- handle new_table only if writing;
- directly compare the us min and max (saves one multiplication);
- atomic-writes only the sysctl_sched_dl_period_XXX which changed.

M2c.

Thanks!
-Alessio

---
From cb4481233b57e42ff9dd315811f7919168a28162 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <[email protected]>
Date: Thu, 22 Aug 2019 18:51:25 +0200
Subject: [PATCH] sched/deadline: Impose global limits on
sched_attr::sched_period

There are two DoS scenarios with SCHED_DEADLINE related to
sched_attr::sched_period:

- since access-control only looks at utilization and density, a very
large period can allow a very large runtime, which in turn can
incur a very large latency to lower priority tasks.

- for very short periods we can end up spending more time programming
the hardware timer than actually running the task.

Mitigate these by imposing limits on the period.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Alessio Balsini <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---
include/linux/sched/sysctl.h | 7 ++++
kernel/sched/deadline.c | 68 ++++++++++++++++++++++++++++++++++--
kernel/sysctl.c | 14 ++++++++
3 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index d4f6215ee03f7..7c8ef07e52133 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -56,6 +56,13 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;

+extern unsigned int sysctl_sched_dl_period_max;
+extern unsigned int sysctl_sched_dl_period_min;
+
+extern int sched_dl_period_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+
#ifdef CONFIG_UCLAMP_TASK
extern unsigned int sysctl_sched_uclamp_util_min;
extern unsigned int sysctl_sched_uclamp_util_max;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 0b9cbfb2b1d4f..c4a6107e055c7 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2640,6 +2640,59 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
attr->sched_flags = dl_se->flags;
}

+/*
+ * Default limits for DL period: on the top end we guard against small util
+ * tasks still getting ridiculous long effective runtimes, on the bottom end we
+ * guard against timer DoS.
+ */
+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4.2 seconds */
+unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
+
+int sched_dl_period_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ static DEFINE_MUTEX(mutex);
+ int ret;
+
+ mutex_lock(&mutex);
+ if (write) {
+ /*
+ * Use a temporary data structure to store the value read from
+ * userspace. The sysctl_sched_dl_period_{max,min} value will
+ * be updated only if the data is consistent.
+ */
+ struct ctl_table new_table = *table;
+ unsigned int max, min;
+
+ if (new_table.data == &sysctl_sched_dl_period_max) {
+ new_table.data = &max;
+ min = sysctl_sched_dl_period_min;
+ } else {
+ new_table.data = &min;
+ max = sysctl_sched_dl_period_max;
+ }
+
+ ret = proc_douintvec(&new_table, write, buffer, lenp, ppos);
+ if (!ret) {
+ if (min > max ||
+ (u64)min * NSEC_PER_USEC < 1ULL << DL_SCALE) {
+ ret = -EINVAL;
+ } else {
+ unsigned int *src = new_table.data;
+ unsigned int *dst = table->data;
+
+ WRITE_ONCE(*dst, *src);
+ }
+ }
+ } else {
+ ret = proc_douintvec(table, write, buffer, lenp, ppos);
+ }
+ mutex_unlock(&mutex);
+
+ return ret;
+}
+
/*
* This function validates the new parameters of a -deadline task.
* We ask for the deadline not being zero, and greater or equal
@@ -2652,6 +2705,8 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
*/
bool __checkparam_dl(const struct sched_attr *attr)
{
+ u64 period, max, min;
+
/* special dl tasks don't actually use any parameter */
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return true;
@@ -2675,12 +2730,21 @@ bool __checkparam_dl(const struct sched_attr *attr)
attr->sched_period & (1ULL << 63))
return false;

+ period = attr->sched_period;
+ if (!period)
+ period = attr->sched_deadline;
+
/* runtime <= deadline <= period (if period != 0) */
- if ((attr->sched_period != 0 &&
- attr->sched_period < attr->sched_deadline) ||
+ if (period < attr->sched_deadline ||
attr->sched_deadline < attr->sched_runtime)
return false;

+ max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
+ min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
+
+ if (period < min || period > max)
+ return false;
+
return true;
}

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 078950d9605ba..0d07e4707e9d2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -442,6 +442,20 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rt_handler,
},
+ {
+ .procname = "sched_deadline_period_max_us",
+ .data = &sysctl_sched_dl_period_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_dl_period_handler,
+ },
+ {
+ .procname = "sched_deadline_period_min_us",
+ .data = &sysctl_sched_dl_period_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_dl_period_handler,
+ },
{
.procname = "sched_rr_timeslice_ms",
.data = &sysctl_sched_rr_timeslice,
--
2.23.0.187.g17f5b7556c-goog

2019-09-02 09:45:25

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On Sat, Aug 31, 2019 at 03:41:17PM +0100, Alessio Balsini wrote:
> Right!
>
> Verified that sysctl_sched_dl_period_max and sysctl_sched_dl_period_min values
> are now always consistent.
>
> I spent some time in trying to figure out if not having any mutex in
> __checkparam_dl() is safe. There can surely happen that "max < min", e.g.:
>
> | | periods
> User1 | User2 | checkparam_dl() | sysctl_sched_dl_*
> ----------|--------------|------------------|-------------------
> | | | [x, x]
> p_min = 5 | | |
> | | | [5, x]
> p_max = 5 | | |
> | | | [5, 5]
> | setattr(p=8) | |
> | | p = 8 |
> | | [x, 5] |
> p_max = 9 | | |
> | | | [5, 9]
> p_min = 6 | | |
> | | | [6, 9]
> | | [6, 5] |
> ----------|--------------|------------------|-------------------
>
> Sharing my thoughts, a "BUG_ON(max < min)" in __checkparam_dl() is then a
> guaranteed source of explosions, but the good news is that "if (period < min ||
> period > max" in __checkparam_dl() surely fails if "max < min". Also the fact
> that, when we are writing the new sysctl_sched_dl_* values, only one is
> actually changed at a time, that surely helps to preserve the consistency.
>
> But is that enough?

Strictly speaking, no, I suppose it is not. We can have two changes in
between the two READ_ONCE()s and then we'd be able to observe a
violation.

The easy way to fix that is do something like:

+ synchronize_rcu();
mutex_unlock(&mutex);

in sched_dl_period_handler(). And do:

+ preempt_disable();
max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
+ preempt_enable();

in __checkparam_dl().

That would prohibit we see two changes, and seeing only the single
change is safe.

2019-09-02 12:39:30

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On Mon, Sep 02, 2019 at 11:16:23AM +0200, Peter Zijlstra wrote:
> On Sat, Aug 31, 2019 at 03:41:17PM +0100, Alessio Balsini wrote:
> > Right!
> >
> > Verified that sysctl_sched_dl_period_max and sysctl_sched_dl_period_min values
> > are now always consistent.
> >
> > I spent some time in trying to figure out if not having any mutex in
> > __checkparam_dl() is safe. There can surely happen that "max < min", e.g.:

> > Sharing my thoughts, a "BUG_ON(max < min)" in __checkparam_dl() is then a
> > guaranteed source of explosions, but the good news is that "if (period < min ||
> > period > max" in __checkparam_dl() surely fails if "max < min". Also the fact
> > that, when we are writing the new sysctl_sched_dl_* values, only one is
> > actually changed at a time, that surely helps to preserve the consistency.
> >
> > But is that enough?
>
> Strictly speaking, no, I suppose it is not. We can have two changes in
> between the two READ_ONCE()s and then we'd be able to observe a
> violation.
>
> The easy way to fix that is do something like:
>
> + synchronize_rcu();
> mutex_unlock(&mutex);
>
> in sched_dl_period_handler(). And do:
>
> + preempt_disable();
> max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
> min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
> + preempt_enable();
>
> in __checkparam_dl().
>
> That would prohibit we see two changes, and seeing only the single
> change is safe.

I pushed out a new version; and added patch to sched_rt_handler() on
top.

Please have a look at:

git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git sched/wip-deadline

I'll move these two patches to sched/core if everything looks good.

2019-09-04 10:18:44

by Steven Rostedt

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On Mon, 2 Sep 2019 11:16:23 +0200
Peter Zijlstra <[email protected]> wrote:

> in sched_dl_period_handler(). And do:
>
> + preempt_disable();
> max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
> min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
> + preempt_enable();

Hmm, I'm curious. Doesn't the preempt_disable/enable() also add
compiler barriers which would remove the need for the READ_ONCE()s here?

-- Steve

2019-09-04 11:32:10

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On Wed, Sep 04, 2019 at 06:16:16AM -0400, Steven Rostedt wrote:
> On Mon, 2 Sep 2019 11:16:23 +0200
> Peter Zijlstra <[email protected]> wrote:
>
> > in sched_dl_period_handler(). And do:
> >
> > + preempt_disable();
> > max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
> > min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
> > + preempt_enable();
>
> Hmm, I'm curious. Doesn't the preempt_disable/enable() also add
> compiler barriers which would remove the need for the READ_ONCE()s here?

They do add compiler barriers; but they do not avoid the compiler
tearing stuff up.

So while Linus has declared that compilers should not be tearing shit
up, I'm hesitant to actually trust compilers much these days.

2019-09-04 13:26:30

by Joel Fernandes

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On Wed, Sep 04, 2019 at 01:30:38PM +0200, Peter Zijlstra wrote:
> On Wed, Sep 04, 2019 at 06:16:16AM -0400, Steven Rostedt wrote:
> > On Mon, 2 Sep 2019 11:16:23 +0200
> > Peter Zijlstra <[email protected]> wrote:
> >
> > > in sched_dl_period_handler(). And do:
> > >
> > > + preempt_disable();
> > > max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
> > > min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
> > > + preempt_enable();
> >
> > Hmm, I'm curious. Doesn't the preempt_disable/enable() also add
> > compiler barriers which would remove the need for the READ_ONCE()s here?
>
> They do add compiler barriers; but they do not avoid the compiler
> tearing stuff up.

Neither does WRITE_ONCE() on some possibly buggy but currently circulating
compilers :(

As Will said in:
https://lore.kernel.org/lkml/20190821103200.kpufwtviqhpbuv2n@willie-the-truck/

void bar(u64 *x)
{
*(volatile u64 *)x = 0xabcdef10abcdef10;
}

gives:

bar:
mov w1, 61200
movk w1, 0xabcd, lsl 16
str w1, [x0]
str w1, [x0, 4]
ret

Speaking of which, Will, is there a plan to have compiler folks address this
tearing issue and are bugs filed somewhere? I believe aarch64 gcc is buggy,
and clang is better but is still buggy?

thanks,

- Joel

2019-09-04 14:12:38

by Will Deacon

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

Hi Joel,

On Wed, Sep 04, 2019 at 09:24:18AM -0400, Joel Fernandes wrote:
> On Wed, Sep 04, 2019 at 01:30:38PM +0200, Peter Zijlstra wrote:
> > On Wed, Sep 04, 2019 at 06:16:16AM -0400, Steven Rostedt wrote:
> > > On Mon, 2 Sep 2019 11:16:23 +0200
> > > Peter Zijlstra <[email protected]> wrote:
> > >
> > > > in sched_dl_period_handler(). And do:
> > > >
> > > > + preempt_disable();
> > > > max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
> > > > min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
> > > > + preempt_enable();
> > >
> > > Hmm, I'm curious. Doesn't the preempt_disable/enable() also add
> > > compiler barriers which would remove the need for the READ_ONCE()s here?
> >
> > They do add compiler barriers; but they do not avoid the compiler
> > tearing stuff up.
>
> Neither does WRITE_ONCE() on some possibly buggy but currently circulating
> compilers :(

Hmm. The example above is using READ_ONCE, which is a different kettle of
frogs.

> As Will said in:
> https://lore.kernel.org/lkml/20190821103200.kpufwtviqhpbuv2n@willie-the-truck/
>
> void bar(u64 *x)
> {
> *(volatile u64 *)x = 0xabcdef10abcdef10;
> }
>
> gives:
>
> bar:
> mov w1, 61200
> movk w1, 0xabcd, lsl 16
> str w1, [x0]
> str w1, [x0, 4]
> ret
>
> Speaking of which, Will, is there a plan to have compiler folks address this
> tearing issue and are bugs filed somewhere? I believe aarch64 gcc is buggy,
> and clang is better but is still buggy?

Well, it depends on your point of view. Personally, I think tearing a
volatile access (e.g. WRITE_ONCE) is buggy and it seems as though the GCC
developers agree:

https://gcc.gnu.org/ml/gcc-patches/2019-08/msg01500.html

so it's likely this will be fixed for AArch64 GCC. I couldn't persuade
clang to break the volatile case, so think we're good there too.

For the non-volatile case, I don't actually consider it to be a bug,
although I sympathise with the desire to avoid a retrospective tree-wide
sweep adding random WRITE_ONCE invocations to stores that look like they
might be concurrent. In other words, I think I'd suggest:

* The use of WRITE_ONCE in new code (probably with a comment justifying it)
* The introduction of WRITE_ONCE to existing code where it can be shown to
be fixing a real bug (e.g. by demonstrating that a compiler actually
gets it wrong)

For the /vast/ majority of cases, the compiler will do the right thing
even without WRITE_ONCE, simply because that's going to be the most
performant choice as well.

Will

2019-09-04 14:36:52

by Joel Fernandes

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On Wed, Sep 04, 2019 at 03:11:10PM +0100, Will Deacon wrote:
> Hi Joel,
>
> On Wed, Sep 04, 2019 at 09:24:18AM -0400, Joel Fernandes wrote:
> > On Wed, Sep 04, 2019 at 01:30:38PM +0200, Peter Zijlstra wrote:
> > > On Wed, Sep 04, 2019 at 06:16:16AM -0400, Steven Rostedt wrote:
> > > > On Mon, 2 Sep 2019 11:16:23 +0200
> > > > Peter Zijlstra <[email protected]> wrote:
> > > >
> > > > > in sched_dl_period_handler(). And do:
> > > > >
> > > > > + preempt_disable();
> > > > > max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
> > > > > min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
> > > > > + preempt_enable();
> > > >
> > > > Hmm, I'm curious. Doesn't the preempt_disable/enable() also add
> > > > compiler barriers which would remove the need for the READ_ONCE()s here?
> > >
> > > They do add compiler barriers; but they do not avoid the compiler
> > > tearing stuff up.
> >
> > Neither does WRITE_ONCE() on some possibly buggy but currently circulating
> > compilers :(
>
> Hmm. The example above is using READ_ONCE, which is a different kettle of
> frogs.

True. But, I equally worry about all *-tearing frog kettles ;-)

> > As Will said in:
> > https://lore.kernel.org/lkml/20190821103200.kpufwtviqhpbuv2n@willie-the-truck/
> >
> > void bar(u64 *x)
> > {
> > *(volatile u64 *)x = 0xabcdef10abcdef10;
> > }
> >
> > gives:
> >
> > bar:
> > mov w1, 61200
> > movk w1, 0xabcd, lsl 16
> > str w1, [x0]
> > str w1, [x0, 4]
> > ret
> >
> > Speaking of which, Will, is there a plan to have compiler folks address this
> > tearing issue and are bugs filed somewhere? I believe aarch64 gcc is buggy,
> > and clang is better but is still buggy?
>
> Well, it depends on your point of view. Personally, I think tearing a
> volatile access (e.g. WRITE_ONCE) is buggy and it seems as though the GCC
> developers agree:
>
> https://gcc.gnu.org/ml/gcc-patches/2019-08/msg01500.html
>
> so it's likely this will be fixed for AArch64 GCC. I couldn't persuade
> clang to break the volatile case, so think we're good there too.

Glad to know that GCC folks are looking into the issue.

Sorry if this is getting a bit off-topic. Also does the aarch64 clang doing
the "memset folding" issue, also need to be looked into?
You had mentioned it in the same thread:
https://lore.kernel.org/lkml/20190821103200.kpufwtviqhpbuv2n@willie-the-truck/
Or, does WRITE_ONCE() resolve such memset store-merging?

> For the non-volatile case, I don't actually consider it to be a bug,
> although I sympathise with the desire to avoid a retrospective tree-wide
> sweep adding random WRITE_ONCE invocations to stores that look like they
> might be concurrent. In other words, I think I'd suggest:
>
> * The use of WRITE_ONCE in new code (probably with a comment justifying it)
> * The introduction of WRITE_ONCE to existing code where it can be shown to
> be fixing a real bug (e.g. by demonstrating that a compiler actually
> gets it wrong)
>
> For the /vast/ majority of cases, the compiler will do the right thing
> even without WRITE_ONCE, simply because that's going to be the most
> performant choice as well.

Thanks for the thoughts. They seem to be reasonable to me.

thanks,

- Joel

2019-09-04 15:54:08

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On Wed, Sep 04, 2019 at 09:24:18AM -0400, Joel Fernandes wrote:
> On Wed, Sep 04, 2019 at 01:30:38PM +0200, Peter Zijlstra wrote:
> > On Wed, Sep 04, 2019 at 06:16:16AM -0400, Steven Rostedt wrote:
> > > On Mon, 2 Sep 2019 11:16:23 +0200
> > > Peter Zijlstra <[email protected]> wrote:
> > >
> > > > in sched_dl_period_handler(). And do:
> > > >
> > > > + preempt_disable();
> > > > max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
> > > > min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
> > > > + preempt_enable();
> > >
> > > Hmm, I'm curious. Doesn't the preempt_disable/enable() also add
> > > compiler barriers which would remove the need for the READ_ONCE()s here?
> >
> > They do add compiler barriers; but they do not avoid the compiler
> > tearing stuff up.
>
> Neither does WRITE_ONCE() on some possibly buggy but currently circulating
> compilers :(

Yes, I'm aware :/

2019-10-24 11:46:22

by Alessio Balsini

[permalink] [raw]

Subject: [PATCH 4.4 4.9 4.14] loop: Add LOOP_SET_DIRECT_IO to compat ioctl

[ Upstream commit fdbe4eeeb1aac219b14f10c0ed31ae5d1123e9b8 ]

Enabling Direct I/O with loop devices helps reducing memory usage by
avoiding double caching. 32 bit applications running on 64 bits systems
are currently not able to request direct I/O because is missing from the
lo_compat_ioctl.

This patch fixes the compatibility issue mentioned above by exporting
LOOP_SET_DIRECT_IO as additional lo_compat_ioctl() entry.
The input argument for this ioctl is a single long converted to a 1-bit
boolean, so compatibility is preserved.

Cc: Jens Axboe <[email protected]>
Signed-off-by: Alessio Balsini <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
---
drivers/block/loop.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index da3902ac16c86..8aadd4d0c3a88 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1557,6 +1557,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
arg = (unsigned long) compat_ptr(arg);
case LOOP_SET_FD:
case LOOP_CHANGE_FD:
+ case LOOP_SET_DIRECT_IO:
err = lo_ioctl(bdev, mode, cmd, arg);
break;
default:
--
2.23.0.866.gb869b98d4c-goog

2019-10-24 11:46:57

by Alessio Balsini

[permalink] [raw]

Subject: Re: [PATCH 4.4 4.9 4.14] loop: Add LOOP_SET_DIRECT_IO to compat ioctl

Ops, please forgive the wrong in-reply-to messge id :)

Cheers,
Alessio

On Wed, Oct 23, 2019 at 06:17:36PM +0100, Alessio Balsini wrote:
> [ Upstream commit fdbe4eeeb1aac219b14f10c0ed31ae5d1123e9b8 ]
>
> Enabling Direct I/O with loop devices helps reducing memory usage by
> avoiding double caching. 32 bit applications running on 64 bits systems
> are currently not able to request direct I/O because is missing from the
> lo_compat_ioctl.
>
> This patch fixes the compatibility issue mentioned above by exporting
> LOOP_SET_DIRECT_IO as additional lo_compat_ioctl() entry.
> The input argument for this ioctl is a single long converted to a 1-bit
> boolean, so compatibility is preserved.
>
> Cc: Jens Axboe <[email protected]>
> Signed-off-by: Alessio Balsini <[email protected]>
> Signed-off-by: Jens Axboe <[email protected]>
> Signed-off-by: Sasha Levin <[email protected]>
> ---
> drivers/block/loop.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index da3902ac16c86..8aadd4d0c3a88 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1557,6 +1557,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
> arg = (unsigned long) compat_ptr(arg);
> case LOOP_SET_FD:
> case LOOP_CHANGE_FD:
> + case LOOP_SET_DIRECT_IO:
> err = lo_ioctl(bdev, mode, cmd, arg);
> break;
> default:
> --
> 2.23.0.866.gb869b98d4c-goog
>
> --
> To unsubscribe from this group and stop receiving emails from it, send an email to [email protected].
>

2019-10-25 19:18:16

by Sasha Levin

[permalink] [raw]

Subject: Re: [PATCH 4.4 4.9 4.14] loop: Add LOOP_SET_DIRECT_IO to compat ioctl

On Wed, Oct 23, 2019 at 06:17:36PM +0100, Alessio Balsini wrote:
>[ Upstream commit fdbe4eeeb1aac219b14f10c0ed31ae5d1123e9b8 ]
>
>Enabling Direct I/O with loop devices helps reducing memory usage by
>avoiding double caching. 32 bit applications running on 64 bits systems
>are currently not able to request direct I/O because is missing from the
>lo_compat_ioctl.
>
>This patch fixes the compatibility issue mentioned above by exporting
>LOOP_SET_DIRECT_IO as additional lo_compat_ioctl() entry.
>The input argument for this ioctl is a single long converted to a 1-bit
>boolean, so compatibility is preserved.
>
>Cc: Jens Axboe <[email protected]>
>Signed-off-by: Alessio Balsini <[email protected]>
>Signed-off-by: Jens Axboe <[email protected]>
>Signed-off-by: Sasha Levin <[email protected]>

Queued up, thanks!

--
Thanks,
Sasha

2020-05-20 18:40:46

by Juri Lelli

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

Hi Peter,

On 26/07/19 16:54, Peter Zijlstra wrote:
>
> Cc: Daniel Bristot de Oliveira <[email protected]>
> Cc: Luca Abeni <[email protected]>
> Cc: Juri Lelli <[email protected]>
> Cc: Dmitry Vyukov <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> include/linux/sched/sysctl.h | 3 +++
> kernel/sched/deadline.c | 23 +++++++++++++++++++++--
> kernel/sysctl.c | 14 ++++++++++++++
> 3 files changed, 38 insertions(+), 2 deletions(-)
>
> --- a/include/linux/sched/sysctl.h
> +++ b/include/linux/sched/sysctl.h
> @@ -56,6 +56,9 @@ int sched_proc_update_handler(struct ctl
> extern unsigned int sysctl_sched_rt_period;
> extern int sysctl_sched_rt_runtime;
>
> +extern unsigned int sysctl_sched_dl_period_max;
> +extern unsigned int sysctl_sched_dl_period_min;
> +
> #ifdef CONFIG_UCLAMP_TASK
> extern unsigned int sysctl_sched_uclamp_util_min;
> extern unsigned int sysctl_sched_uclamp_util_max;
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -2597,6 +2597,14 @@ void __getparam_dl(struct task_struct *p
> }
>
> /*
> + * Default limits for DL period; on the top end we guard against small util
> + * tasks still getting rediculous long effective runtimes, on the bottom end we
> + * guard against timer DoS.
> + */
> +unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
> +unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
> +
> +/*
> * This function validates the new parameters of a -deadline task.
> * We ask for the deadline not being zero, and greater or equal
> * than the runtime, as well as the period of being zero or
> @@ -2608,6 +2616,8 @@ void __getparam_dl(struct task_struct *p
> */
> bool __checkparam_dl(const struct sched_attr *attr)
> {
> + u64 period, max, min;
> +
> /* special dl tasks don't actually use any parameter */
> if (attr->sched_flags & SCHED_FLAG_SUGOV)
> return true;
> @@ -2631,12 +2641,21 @@ bool __checkparam_dl(const struct sched_
> attr->sched_period & (1ULL << 63))
> return false;
>
> + period = attr->sched_period;
> + if (!period)
> + period = attr->sched_deadline;
> +
> /* runtime <= deadline <= period (if period != 0) */
> - if ((attr->sched_period != 0 &&
> - attr->sched_period < attr->sched_deadline) ||
> + if (period < attr->sched_deadline ||
> attr->sched_deadline < attr->sched_runtime)
> return false;
>
> + max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
> + min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
> +
> + if (period < min || period > max)
> + return false;
> +
> return true;
> }
>
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -443,6 +443,20 @@ static struct ctl_table kern_table[] = {
> .proc_handler = sched_rt_handler,
> },
> {
> + .procname = "sched_deadline_period_max_us",
> + .data = &sysctl_sched_dl_period_max,
> + .maxlen = sizeof(unsigned int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec,
> + },
> + {
> + .procname = "sched_deadline_period_min_us",
> + .data = &sysctl_sched_dl_period_min,
> + .maxlen = sizeof(unsigned int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec,
> + },
> + {
> .procname = "sched_rr_timeslice_ms",
> .data = &sysctl_sched_rr_timeslice,
> .maxlen = sizeof(int),

I think this never made it upstream. And I believe both me and Daniel
were OK with it. Do you recall if any additional change was needed?

Thanks,

Juri

2020-05-21 13:48:04

by Daniel Bristot de Oliveira

[permalink] [raw]

Subject: Re: [RFC][PATCH 01/13] sched/deadline: Impose global limits on sched_attr::sched_period

On 5/20/20 8:38 PM, Juri Lelli wrote:
> Hi Peter,
>
> On 26/07/19 16:54, Peter Zijlstra wrote:
>> Cc: Daniel Bristot de Oliveira <[email protected]>
>> Cc: Luca Abeni <[email protected]>
>> Cc: Juri Lelli <[email protected]>
>> Cc: Dmitry Vyukov <[email protected]>
>> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
>> ---
>> include/linux/sched/sysctl.h | 3 +++
>> kernel/sched/deadline.c | 23 +++++++++++++++++++++--
>> kernel/sysctl.c | 14 ++++++++++++++
>> 3 files changed, 38 insertions(+), 2 deletions(-)
>>
>> --- a/include/linux/sched/sysctl.h
>> +++ b/include/linux/sched/sysctl.h
>> @@ -56,6 +56,9 @@ int sched_proc_update_handler(struct ctl
>> extern unsigned int sysctl_sched_rt_period;
>> extern int sysctl_sched_rt_runtime;
>>
>> +extern unsigned int sysctl_sched_dl_period_max;
>> +extern unsigned int sysctl_sched_dl_period_min;
>> +
>> #ifdef CONFIG_UCLAMP_TASK
>> extern unsigned int sysctl_sched_uclamp_util_min;
>> extern unsigned int sysctl_sched_uclamp_util_max;
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -2597,6 +2597,14 @@ void __getparam_dl(struct task_struct *p
>> }
>>
>> /*
>> + * Default limits for DL period; on the top end we guard against small util
>> + * tasks still getting rediculous long effective runtimes, on the bottom end we
>> + * guard against timer DoS.
>> + */
>> +unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
>> +unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
>> +
>> +/*
>> * This function validates the new parameters of a -deadline task.
>> * We ask for the deadline not being zero, and greater or equal
>> * than the runtime, as well as the period of being zero or
>> @@ -2608,6 +2616,8 @@ void __getparam_dl(struct task_struct *p
>> */
>> bool __checkparam_dl(const struct sched_attr *attr)
>> {
>> + u64 period, max, min;
>> +
>> /* special dl tasks don't actually use any parameter */
>> if (attr->sched_flags & SCHED_FLAG_SUGOV)
>> return true;
>> @@ -2631,12 +2641,21 @@ bool __checkparam_dl(const struct sched_
>> attr->sched_period & (1ULL << 63))
>> return false;
>>
>> + period = attr->sched_period;
>> + if (!period)
>> + period = attr->sched_deadline;
>> +
>> /* runtime <= deadline <= period (if period != 0) */
>> - if ((attr->sched_period != 0 &&
>> - attr->sched_period < attr->sched_deadline) ||
>> + if (period < attr->sched_deadline ||
>> attr->sched_deadline < attr->sched_runtime)
>> return false;
>>
>> + max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
>> + min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
>> +
>> + if (period < min || period > max)
>> + return false;
>> +
>> return true;
>> }
>>
>> --- a/kernel/sysctl.c
>> +++ b/kernel/sysctl.c
>> @@ -443,6 +443,20 @@ static struct ctl_table kern_table[] = {
>> .proc_handler = sched_rt_handler,
>> },
>> {
>> + .procname = "sched_deadline_period_max_us",
>> + .data = &sysctl_sched_dl_period_max,
>> + .maxlen = sizeof(unsigned int),
>> + .mode = 0644,
>> + .proc_handler = proc_dointvec,
>> + },
>> + {
>> + .procname = "sched_deadline_period_min_us",
>> + .data = &sysctl_sched_dl_period_min,
>> + .maxlen = sizeof(unsigned int),
>> + .mode = 0644,
>> + .proc_handler = proc_dointvec,
>> + },
>> + {
>> .procname = "sched_rr_timeslice_ms",
>> .data = &sysctl_sched_rr_timeslice,
>> .maxlen = sizeof(int),
> I think this never made it upstream. And I believe both me and Daniel
> were OK with it. Do you recall if any additional change was needed?

Just to confirm, yes I am OK with it!

-- Daniel

2020-06-16 12:24:57

by tip-bot2 for Jacob Pan

[permalink] [raw]

Subject: [tip: sched/core] sched/deadline: Impose global limits on sched_attr::sched_period

The following commit has been merged into the sched/core branch of tip:

Commit-ID: b4098bfc5efb1fd7ecf40165132a1283aeea3500
Gitweb: https://git.kernel.org/tip/b4098bfc5efb1fd7ecf40165132a1283aeea3500
Author: Peter Zijlstra <[email protected]>
AuthorDate: Fri, 26 Jul 2019 16:54:10 +02:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Mon, 15 Jun 2020 14:10:04 +02:00

sched/deadline: Impose global limits on sched_attr::sched_period

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
include/linux/sched/sysctl.h | 3 +++
kernel/sched/deadline.c | 23 +++++++++++++++++++++--
kernel/sysctl.c | 14 ++++++++++++++
3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 660ac49..24be30a 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -61,6 +61,9 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;

+extern unsigned int sysctl_sched_dl_period_max;
+extern unsigned int sysctl_sched_dl_period_min;
+
#ifdef CONFIG_UCLAMP_TASK
extern unsigned int sysctl_sched_uclamp_util_min;
extern unsigned int sysctl_sched_uclamp_util_max;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 504d2f5..f31964a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2635,6 +2635,14 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
}

/*
+ * Default limits for DL period; on the top end we guard against small util
+ * tasks still getting rediculous long effective runtimes, on the bottom end we
+ * guard against timer DoS.
+ */
+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
+unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
+
+/*
* This function validates the new parameters of a -deadline task.
* We ask for the deadline not being zero, and greater or equal
* than the runtime, as well as the period of being zero or
@@ -2646,6 +2654,8 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
*/
bool __checkparam_dl(const struct sched_attr *attr)
{
+ u64 period, max, min;
+
/* special dl tasks don't actually use any parameter */
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return true;
@@ -2669,12 +2679,21 @@ bool __checkparam_dl(const struct sched_attr *attr)
attr->sched_period & (1ULL << 63))
return false;

+ period = attr->sched_period;
+ if (!period)
+ period = attr->sched_deadline;
+
/* runtime <= deadline <= period (if period != 0) */
- if ((attr->sched_period != 0 &&
- attr->sched_period < attr->sched_deadline) ||
+ if (period < attr->sched_deadline ||
attr->sched_deadline < attr->sched_runtime)
return false;

+ max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
+ min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
+
+ if (period < min || period > max)
+ return false;
+
return true;
}

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index db1ce7a..4aea67d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1780,6 +1780,20 @@ static struct ctl_table kern_table[] = {
.proc_handler = sched_rt_handler,
},
{
+ .procname = "sched_deadline_period_max_us",
+ .data = &sysctl_sched_dl_period_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_deadline_period_min_us",
+ .data = &sysctl_sched_dl_period_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "sched_rr_timeslice_ms",
.data = &sysctl_sched_rr_timeslice,
.maxlen = sizeof(int),