2024-01-30 08:18:08

by Xuewen Yan

[permalink] [raw]
Subject: [PATCH] sched/eevdf: Prevent vlag from exceeding the limit value

There are some scenarios here that will cause vlag to
exceed eevdf's limit.

1. In set_next_entity, it will stash a copy of deadline
at the point of pick in vlag, and if the task fork child,
the child's vlag would inherit parent's vlag, as a result,
the child's vlag is equal to the deadline. And then,
when place_entity, because the se's vlag is wrong, it would
cause vruntime and deadline update error.

2. When reweight_entity, the vlag would be recalculated,
because the new weights are uncertain, and it may cause
the vlag to exceed eevdf's limit.

In order to ensure that vlag does not exceed the limit,
separate the calculation of limit from the update_entity_lag
and create a new func limit_entity_lag. When the vlag is
updated, use this func to prevent vlag from exceeding the limit.

And add check whether the se is forked in place_entity, and calc
the curr's vlag to update the se's vlag.

Signed-off-by: Xuewen Yan <[email protected]>
---
kernel/sched/fair.c | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 533547e3c90a..3fc99b4b8b41 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -696,15 +696,22 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
*
* XXX could add max_slice to the augmented data to track this.
*/
+static s64 limit_entity_lag(struct sched_entity *se, s64 lag)
+{
+ s64 limit;
+
+ limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
+ return clamp(lag, -limit, limit);
+}
+
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- s64 lag, limit;
+ s64 lag;

SCHED_WARN_ON(!se->on_rq);
lag = avg_vruntime(cfs_rq) - se->vruntime;

- limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
- se->vlag = clamp(lag, -limit, limit);
+ se->vlag = limit_entity_lag(se, lag);
}

/*
@@ -3757,6 +3764,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
if (avruntime != se->vruntime) {
vlag = (s64)(avruntime - se->vruntime);
vlag = div_s64(vlag * old_weight, weight);
+ vlag = limit_entity_lag(se, vlag);
se->vruntime = avruntime - vlag;
}

@@ -3804,6 +3812,9 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,

update_load_set(&se->load, weight);

+ if (!se->on_rq)
+ se-vlag = limit_entity_lag(se, se->vlag);
+
#ifdef CONFIG_SMP
do {
u32 divider = get_pelt_divider(&se->avg);
@@ -5177,6 +5188,16 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)

lag = se->vlag;

+ /* When fork, the child would inherit parent's vlag,
+ * but parent's vlag may be euqual to it's deadline.
+ */
+ if (flag & ENQUEUE_INITIAL) {
+ if (curr)
+ lag = vruntime - curr->vruntime;
+
+ lag = limit_entity_lag(se, lag);
+ se->vlag = lag;
+ }
/*
* If we want to place a task and preserve lag, we have to
* consider the effect of the new entity on the weighted
@@ -5237,6 +5258,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (WARN_ON_ONCE(!load))
load = 1;
lag = div_s64(lag, load);
+ lag = limit_entity_lag(se, lag);
}

se->vruntime = vruntime - lag;
--
2.25.1



2024-02-05 06:36:57

by Xuewen Yan

[permalink] [raw]
Subject: Re: [PATCH] sched/eevdf: Prevent vlag from exceeding the limit value

Hi all,

On Tue, Jan 30, 2024 at 4:07 PM Xuewen Yan <[email protected]> wrote:
>
> There are some scenarios here that will cause vlag to
> exceed eevdf's limit.
>
> 1. In set_next_entity, it will stash a copy of deadline
> at the point of pick in vlag, and if the task fork child,
> the child's vlag would inherit parent's vlag, as a result,
> the child's vlag is equal to the deadline. And then,
> when place_entity, because the se's vlag is wrong, it would
> cause vruntime and deadline update error.

Could anyone make some comments?

In my opinion, even if it may be ignored when reweighting, it is
better to solve the problem when forking.
Maybe we could change some settings in set_next_entity() where vlag is
equal to deadline?


Thanks!
--
BR
xuewen

>
> 2. When reweight_entity, the vlag would be recalculated,
> because the new weights are uncertain, and it may cause
> the vlag to exceed eevdf's limit.
>
> In order to ensure that vlag does not exceed the limit,
> separate the calculation of limit from the update_entity_lag
> and create a new func limit_entity_lag. When the vlag is
> updated, use this func to prevent vlag from exceeding the limit.
>
> And add check whether the se is forked in place_entity, and calc
> the curr's vlag to update the se's vlag.
>
> Signed-off-by: Xuewen Yan <[email protected]>
> ---
> kernel/sched/fair.c | 28 +++++++++++++++++++++++++---
> 1 file changed, 25 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 533547e3c90a..3fc99b4b8b41 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -696,15 +696,22 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
> *
> * XXX could add max_slice to the augmented data to track this.
> */
> +static s64 limit_entity_lag(struct sched_entity *se, s64 lag)
> +{
> + s64 limit;
> +
> + limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
> + return clamp(lag, -limit, limit);
> +}
> +
> static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
> {
> - s64 lag, limit;
> + s64 lag;
>
> SCHED_WARN_ON(!se->on_rq);
> lag = avg_vruntime(cfs_rq) - se->vruntime;
>
> - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
> - se->vlag = clamp(lag, -limit, limit);
> + se->vlag = limit_entity_lag(se, lag);
> }
>
> /*
> @@ -3757,6 +3764,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
> if (avruntime != se->vruntime) {
> vlag = (s64)(avruntime - se->vruntime);
> vlag = div_s64(vlag * old_weight, weight);
> + vlag = limit_entity_lag(se, vlag);
> se->vruntime = avruntime - vlag;
> }
>
> @@ -3804,6 +3812,9 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
>
> update_load_set(&se->load, weight);
>
> + if (!se->on_rq)
> + se-vlag = limit_entity_lag(se, se->vlag);
> +
> #ifdef CONFIG_SMP
> do {
> u32 divider = get_pelt_divider(&se->avg);
> @@ -5177,6 +5188,16 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>
> lag = se->vlag;
>
> + /* When fork, the child would inherit parent's vlag,
> + * but parent's vlag may be euqual to it's deadline.
> + */
> + if (flag & ENQUEUE_INITIAL) {
> + if (curr)
> + lag = vruntime - curr->vruntime;
> +
> + lag = limit_entity_lag(se, lag);
> + se->vlag = lag;
> + }
> /*
> * If we want to place a task and preserve lag, we have to
> * consider the effect of the new entity on the weighted
> @@ -5237,6 +5258,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> if (WARN_ON_ONCE(!load))
> load = 1;
> lag = div_s64(lag, load);
> + lag = limit_entity_lag(se, lag);
> }
>
> se->vruntime = vruntime - lag;
> --
> 2.25.1
>

2024-04-08 12:06:27

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] sched/eevdf: Prevent vlag from exceeding the limit value

On Tue, Jan 30, 2024 at 04:06:43PM +0800, Xuewen Yan wrote:
> There are some scenarios here that will cause vlag to
> exceed eevdf's limit.
>
> 1. In set_next_entity, it will stash a copy of deadline
> at the point of pick in vlag, and if the task fork child,
> the child's vlag would inherit parent's vlag, as a result,
> the child's vlag is equal to the deadline. And then,
> when place_entity, because the se's vlag is wrong, it would
> cause vruntime and deadline update error.

But __sched_fork() clears it? Or am I missing something subtle?

> 2. When reweight_entity, the vlag would be recalculated,
> because the new weights are uncertain, and it may cause
> the vlag to exceed eevdf's limit.
>
> In order to ensure that vlag does not exceed the limit,
> separate the calculation of limit from the update_entity_lag
> and create a new func limit_entity_lag. When the vlag is
> updated, use this func to prevent vlag from exceeding the limit.
>
> And add check whether the se is forked in place_entity, and calc
> the curr's vlag to update the se's vlag.
>
> Signed-off-by: Xuewen Yan <[email protected]>
> ---
> kernel/sched/fair.c | 28 +++++++++++++++++++++++++---
> 1 file changed, 25 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 533547e3c90a..3fc99b4b8b41 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -696,15 +696,22 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
> *
> * XXX could add max_slice to the augmented data to track this.
> */
> +static s64 limit_entity_lag(struct sched_entity *se, s64 lag)
> +{
> + s64 limit;
> +
> + limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
> + return clamp(lag, -limit, limit);
> +}

I used to run with a trace_printk() in there to check how often this
trips, can you readily trigger these sites, or do you have to work hard
at it?


> static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
> {
> - s64 lag, limit;
> + s64 lag;
>
> SCHED_WARN_ON(!se->on_rq);
> lag = avg_vruntime(cfs_rq) - se->vruntime;
>
> - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
> - se->vlag = clamp(lag, -limit, limit);
> + se->vlag = limit_entity_lag(se, lag);
> }
>
> /*
> @@ -3757,6 +3764,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
> if (avruntime != se->vruntime) {
> vlag = (s64)(avruntime - se->vruntime);
> vlag = div_s64(vlag * old_weight, weight);
> + vlag = limit_entity_lag(se, vlag);
> se->vruntime = avruntime - vlag;
> }
>
> @@ -3804,6 +3812,9 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
>
> update_load_set(&se->load, weight);
>
> + if (!se->on_rq)
> + se-vlag = limit_entity_lag(se, se->vlag);
> +
> #ifdef CONFIG_SMP
> do {
> u32 divider = get_pelt_divider(&se->avg);

> @@ -5237,6 +5258,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> if (WARN_ON_ONCE(!load))
> load = 1;
> lag = div_s64(lag, load);
> + lag = limit_entity_lag(se, lag);
> }
>
> se->vruntime = vruntime - lag;
> --
> 2.25.1
>

2024-04-18 02:51:11

by Xuewen Yan

[permalink] [raw]
Subject: Re: [PATCH] sched/eevdf: Prevent vlag from exceeding the limit value

Hi Peter,

Sorry for the late reply.

On Mon, Apr 8, 2024 at 8:06 PM Peter Zijlstra <[email protected]> wrote:
>
> On Tue, Jan 30, 2024 at 04:06:43PM +0800, Xuewen Yan wrote:
> > There are some scenarios here that will cause vlag to
> > exceed eevdf's limit.
> >
> > 1. In set_next_entity, it will stash a copy of deadline
> > at the point of pick in vlag, and if the task fork child,
> > the child's vlag would inherit parent's vlag, as a result,
> > the child's vlag is equal to the deadline. And then,
> > when place_entity, because the se's vlag is wrong, it would
> > cause vruntime and deadline update error.
>
> But __sched_fork() clears it? Or am I missing something subtle?

Yes, you are right, I missed the __sched_fork().

>
> > 2. When reweight_entity, the vlag would be recalculated,
> > because the new weights are uncertain, and it may cause
> > the vlag to exceed eevdf's limit.
> >
> > In order to ensure that vlag does not exceed the limit,
> > separate the calculation of limit from the update_entity_lag
> > and create a new func limit_entity_lag. When the vlag is
> > updated, use this func to prevent vlag from exceeding the limit.
> >
> > And add check whether the se is forked in place_entity, and calc
> > the curr's vlag to update the se's vlag.
> >
> > Signed-off-by: Xuewen Yan <[email protected]>
> > ---
> > kernel/sched/fair.c | 28 +++++++++++++++++++++++++---
> > 1 file changed, 25 insertions(+), 3 deletions(-)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 533547e3c90a..3fc99b4b8b41 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -696,15 +696,22 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
> > *
> > * XXX could add max_slice to the augmented data to track this.
> > */
> > +static s64 limit_entity_lag(struct sched_entity *se, s64 lag)
> > +{
> > + s64 limit;
> > +
> > + limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
> > + return clamp(lag, -limit, limit);
> > +}
>
> I used to run with a trace_printk() in there to check how often this
> trips, can you readily trigger these sites, or do you have to work hard
> at it?

I can trigger this easily. And I add dump_stack, the case is much:

[ 11.372967] Call trace:
[ 11.372971] dump_backtrace+0xec/0x138
[ 11.372990] show_stack+0x18/0x24
[ 11.372995] dump_stack_lvl+0x60/0x84
[ 11.373002] dump_stack+0x18/0x24
[ 11.373007] place_entity+0x210/0x25c
[ 11.373015] enqueue_task_fair+0x1e0/0x658
[ 11.373021] enqueue_task+0x94/0x188
[ 11.373028] ttwu_do_activate+0xb4/0x27c
[ 11.373034] try_to_wake_up+0x2fc/0x800
[ 11.373040] wake_up_q+0x70/0xd8
[ 11.373046] futex_wake+0x1e4/0x32c
[ 11.373052] do_futex+0x144/0x27c
[ 11.373056] __arm64_sys_futex_time32+0xa8/0x1b8
[ 11.373061] invoke_syscall+0x58/0x114
[ 11.373068] el0_svc_common+0xac/0xe0
[ 11.373074] do_el0_svc_compat+0x1c/0x28
[ 11.373079] el0_svc_compat+0x38/0x6c
[ 11.373085] el0t_32_sync_handler+0x60/0x8c
[ 11.373091] el0t_32_sync+0x1ac/0x1b0
[ 11.373097] the lag=4562177811 vruntime=18058609737 limit=3071999998

[ 38.778444] Call trace:
[ 38.778447] dump_backtrace+0xec/0x138
[ 38.778465] show_stack+0x18/0x24
[ 38.778470] dump_stack_lvl+0x60/0x84
[ 38.778479] dump_stack+0x18/0x24
[ 38.778484] place_entity+0x210/0x25c
[ 38.778492] enqueue_task_fair+0x1e0/0x658
[ 38.778498] enqueue_task+0x94/0x188
[ 38.778505] ttwu_do_activate+0xb4/0x27c
[ 38.778512] try_to_wake_up+0x2fc/0x800
[ 38.778518] wake_up_process+0x18/0x28
[ 38.778523] hrtimer_wakeup+0x20/0x94
[ 38.778530] __hrtimer_run_queues+0x19c/0x39c
[ 38.778536] hrtimer_interrupt+0xdc/0x39c
[ 38.778541] arch_timer_handler_phys+0x50/0x64
[ 38.778549] handle_percpu_devid_irq+0xb4/0x258
[ 38.778556] generic_handle_domain_irq+0x44/0x60
[ 38.778563] gic_handle_irq+0x4c/0x114
[ 38.778568] call_on_irq_stack+0x3c/0x74
[ 38.778573] do_interrupt_handler+0x4c/0x84
[ 38.778580] el0_interrupt+0x54/0xe4
[ 38.778586] __el0_irq_handler_common+0x18/0x28
[ 38.778591] el0t_64_irq_handler+0x10/0x1c
[ 38.778596] el0t_64_irq+0x1a8/0x1ac
[ 38.778602] the lag=-993821948 vruntime=162909998203 limit=877714284

[ 41.205507] Call trace:
[ 41.205508] dump_backtrace+0xec/0x138
[ 41.205516] show_stack+0x18/0x24
[ 41.205518] dump_stack_lvl+0x60/0x84
[ 41.205523] dump_stack+0x18/0x24
[ 41.205526] place_entity+0x210/0x25c
[ 41.205530] enqueue_task_fair+0x1e0/0x658
[ 41.205533] enqueue_task+0x94/0x188
[ 41.205537] ttwu_do_activate+0xb4/0x27c
[ 41.205540] try_to_wake_up+0x2fc/0x800
[ 41.205543] default_wake_function+0x20/0x38
[ 41.205547] autoremove_wake_function+0x18/0x7c
[ 41.205551] receiver_wake_function+0x28/0x38
[ 41.205555] __wake_up_common+0xe8/0x188
[ 41.205558] __wake_up_sync_key+0x88/0x144
[ 41.205561] sock_def_readable+0xb4/0x1ac
[ 41.205565] unix_dgram_sendmsg+0x5b4/0x764
[ 41.205569] sock_write_iter+0xe0/0x160
[ 41.205571] do_iter_write+0x210/0x338
[ 41.205575] do_writev+0xd8/0x198
[ 41.205577] __arm64_sys_writev+0x20/0x30
[ 41.205581] invoke_syscall+0x58/0x114
[ 41.205584] el0_svc_common+0x80/0xe0
[ 41.205588] do_el0_svc+0x1c/0x28
[ 41.205591] el0_svc+0x3c/0x70
[ 41.205594] el0t_64_sync_handler+0x68/0xbc
[ 41.205597] el0t_64_sync+0x1a8/0x1ac
[ 41.205599] the lag=-7374172458 vruntime=209576555829 limit=3071999998

[ 41.416780] Call trace:
[ 41.416781] dump_backtrace+0xec/0x138
[ 41.416788] show_stack+0x18/0x24
[ 41.416791] dump_stack_lvl+0x60/0x84
[ 41.416797] dump_stack+0x18/0x24
[ 41.416799] place_entity+0x210/0x25c
[ 41.416804] enqueue_task_fair+0x1e0/0x658
[ 41.416807] enqueue_task+0x94/0x188
[ 41.416811] ttwu_do_activate+0xb4/0x27c
[ 41.416814] try_to_wake_up+0x2fc/0x800
[ 41.416818] default_wake_function+0x20/0x38
[ 41.416821] autoremove_wake_function+0x18/0x7c
[ 41.416825] __wake_up_common+0xe8/0x188
[ 41.416828] __wake_up_sync_key+0x88/0x144
[ 41.416831] __wake_up_sync+0x14/0x24
[ 41.416834] binder_transaction+0x1794/0x1e98
[ 41.416838] binder_ioctl_write_read+0x590/0x3950
[ 41.416841] binder_ioctl+0x1ec/0xa40
[ 41.416844] __arm64_sys_ioctl+0xa8/0xe4
[ 41.416849] invoke_syscall+0x58/0x114
[ 41.416853] el0_svc_common+0x80/0xe0
[ 41.416857] do_el0_svc+0x1c/0x28
[ 41.416860] el0_svc+0x3c/0x70
[ 41.416868] the lag=3625583753 vruntime=254849351272 limit=3071999998
[ 41.416863] el0t_64_sync_handler+0x68/0xbc
[ 41.416866] el0t_64_sync+0x1a8/0x1ac

Thanks!
BR

---
xuewen

>
>
> > static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
> > {
> > - s64 lag, limit;
> > + s64 lag;
> >
> > SCHED_WARN_ON(!se->on_rq);
> > lag = avg_vruntime(cfs_rq) - se->vruntime;
> >
> > - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
> > - se->vlag = clamp(lag, -limit, limit);
> > + se->vlag = limit_entity_lag(se, lag);
> > }
> >
> > /*
> > @@ -3757,6 +3764,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
> > if (avruntime != se->vruntime) {
> > vlag = (s64)(avruntime - se->vruntime);
> > vlag = div_s64(vlag * old_weight, weight);
> > + vlag = limit_entity_lag(se, vlag);
> > se->vruntime = avruntime - vlag;
> > }
> >
> > @@ -3804,6 +3812,9 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
> >
> > update_load_set(&se->load, weight);
> >
> > + if (!se->on_rq)
> > + se-vlag = limit_entity_lag(se, se->vlag);
> > +
> > #ifdef CONFIG_SMP
> > do {
> > u32 divider = get_pelt_divider(&se->avg);
>
> > @@ -5237,6 +5258,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> > if (WARN_ON_ONCE(!load))
> > load = 1;
> > lag = div_s64(lag, load);
> > + lag = limit_entity_lag(se, lag);
> > }
> >
> > se->vruntime = vruntime - lag;
> > --
> > 2.25.1
> >