2023-11-21 06:47:07

by Ankur Arora

[permalink] [raw]
Subject: Re: [RFC PATCH 42/86] sched: force preemption on tick expiration


Peter Zijlstra <[email protected]> writes:

> On Tue, Nov 07, 2023 at 01:57:28PM -0800, Ankur Arora wrote:
>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 4d86c618ffa2..fe7e5e9b2207 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -1016,8 +1016,11 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
>> * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
>> * this is probably good enough.
>> */
>> -static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
>> +static void update_deadline(struct cfs_rq *cfs_rq,
>> + struct sched_entity *se, bool tick)
>> {
>> + struct rq *rq = rq_of(cfs_rq);
>> +
>> if ((s64)(se->vruntime - se->deadline) < 0)
>> return;
>>
>> @@ -1033,13 +1036,19 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
>> */
>> se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
>>
>> + if (cfs_rq->nr_running < 2)
>> + return;
>> +
>> /*
>> - * The task has consumed its request, reschedule.
>> + * The task has consumed its request, reschedule; eagerly
>> + * if it ignored our last lazy reschedule.
>> */
>> - if (cfs_rq->nr_running > 1) {
>> - resched_curr(rq_of(cfs_rq));
>> - clear_buddies(cfs_rq, se);
>> - }
>> + if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY))
>> + __resched_curr(rq, RESCHED_eager);
>> + else
>> + resched_curr(rq);
>> +
>> + clear_buddies(cfs_rq, se);
>> }
>>
>> #include "pelt.h"
>> @@ -1147,7 +1156,7 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
>> /*
>> * Update the current task's runtime statistics.
>> */
>> -static void update_curr(struct cfs_rq *cfs_rq)
>> +static void __update_curr(struct cfs_rq *cfs_rq, bool tick)
>> {
>> struct sched_entity *curr = cfs_rq->curr;
>> u64 now = rq_clock_task(rq_of(cfs_rq));
>> @@ -1174,7 +1183,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
>> schedstat_add(cfs_rq->exec_clock, delta_exec);
>>
>> curr->vruntime += calc_delta_fair(delta_exec, curr);
>> - update_deadline(cfs_rq, curr);
>> + update_deadline(cfs_rq, curr, tick);
>> update_min_vruntime(cfs_rq);
>>
>> if (entity_is_task(curr)) {
>> @@ -1188,6 +1197,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
>> account_cfs_rq_runtime(cfs_rq, delta_exec);
>> }
>>
>> +static void update_curr(struct cfs_rq *cfs_rq)
>> +{
>> + __update_curr(cfs_rq, false);
>> +}
>> +
>> static void update_curr_fair(struct rq *rq)
>> {
>> update_curr(cfs_rq_of(&rq->curr->se));
>> @@ -5309,7 +5323,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
>> /*
>> * Update run-time statistics of the 'current'.
>> */
>> - update_curr(cfs_rq);
>> + __update_curr(cfs_rq, true);
>>
>> /*
>> * Ensure that runnable average is periodically updated.
>
> I'm thinking this will be less of a mess if you flip it around some.
>
> (ignore the hrtick mess, I'll try and get that cleaned up)
>
> This way you have two distinct sites to handle the preemption. the
> update_curr() would be 'FULL ? force : lazy' while the tick one gets the
> special magic bits.

Thanks that simplified changes here quite nicely.

--
ankur