Subject: [PATCH v4 1/7] sched: Unify runtime accounting across classes

From: Peter Zijlstra <[email protected]>

All classes use sched_entity::exec_start to track runtime and have
copies of the exact same code around to compute runtime.

Collapse all that.

Reviewed-by: Phil Auld <[email protected]>
Reviewed-by: Valentin Schneider <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
---
include/linux/sched.h | 2 +-
kernel/sched/deadline.c | 15 +++--------
kernel/sched/fair.c | 57 ++++++++++++++++++++++++++++++----------
kernel/sched/rt.c | 15 +++--------
kernel/sched/sched.h | 12 ++-------
kernel/sched/stop_task.c | 13 +--------
6 files changed, 53 insertions(+), 61 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 177b3f3676ef..639f6eb9bd4f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -520,7 +520,7 @@ struct sched_statistics {
u64 block_max;
s64 sum_block_runtime;

- u64 exec_max;
+ s64 exec_max;
u64 slice_max;

u64 nr_migrations_cold;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 58b542bf2893..9a09d9dafd88 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1299,9 +1299,8 @@ static void update_curr_dl(struct rq *rq)
{
struct task_struct *curr = rq->curr;
struct sched_dl_entity *dl_se = &curr->dl;
- u64 delta_exec, scaled_delta_exec;
+ s64 delta_exec, scaled_delta_exec;
int cpu = cpu_of(rq);
- u64 now;

if (!dl_task(curr) || !on_dl_rq(dl_se))
return;
@@ -1314,21 +1313,13 @@ static void update_curr_dl(struct rq *rq)
* natural solution, but the full ramifications of this
* approach need further study.
*/
- now = rq_clock_task(rq);
- delta_exec = now - curr->se.exec_start;
- if (unlikely((s64)delta_exec <= 0)) {
+ delta_exec = update_curr_common(rq);
+ if (unlikely(delta_exec <= 0)) {
if (unlikely(dl_se->dl_yielded))
goto throttle;
return;
}

- schedstat_set(curr->stats.exec_max,
- max(curr->stats.exec_max, delta_exec));
-
- trace_sched_stat_runtime(curr, delta_exec, 0);
-
- update_current_exec_runtime(curr, now, delta_exec);
-
if (dl_entity_is_special(dl_se))
return;

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 911d0063763c..52c8219623b1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1092,23 +1092,17 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
}
#endif /* CONFIG_SMP */

-/*
- * Update the current task's runtime statistics.
- */
-static void update_curr(struct cfs_rq *cfs_rq)
+static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
{
- struct sched_entity *curr = cfs_rq->curr;
- u64 now = rq_clock_task(rq_of(cfs_rq));
- u64 delta_exec;
-
- if (unlikely(!curr))
- return;
+ u64 now = rq_clock_task(rq);
+ s64 delta_exec;

delta_exec = now - curr->exec_start;
- if (unlikely((s64)delta_exec <= 0))
- return;
+ if (unlikely(delta_exec <= 0))
+ return delta_exec;

curr->exec_start = now;
+ curr->sum_exec_runtime += delta_exec;

if (schedstat_enabled()) {
struct sched_statistics *stats;
@@ -1118,8 +1112,43 @@ static void update_curr(struct cfs_rq *cfs_rq)
max(delta_exec, stats->exec_max));
}

- curr->sum_exec_runtime += delta_exec;
- schedstat_add(cfs_rq->exec_clock, delta_exec);
+ return delta_exec;
+}
+
+/*
+ * Used by other classes to account runtime.
+ */
+s64 update_curr_common(struct rq *rq)
+{
+ struct task_struct *curr = rq->curr;
+ s64 delta_exec;
+
+ delta_exec = update_curr_se(rq, &curr->se);
+ if (unlikely(delta_exec <= 0))
+ return delta_exec;
+
+ trace_sched_stat_runtime(curr, delta_exec, 0);
+
+ account_group_exec_runtime(curr, delta_exec);
+ cgroup_account_cputime(curr, delta_exec);
+
+ return delta_exec;
+}
+
+/*
+ * Update the current task's runtime statistics.
+ */
+static void update_curr(struct cfs_rq *cfs_rq)
+{
+ struct sched_entity *curr = cfs_rq->curr;
+ s64 delta_exec;
+
+ if (unlikely(!curr))
+ return;
+
+ delta_exec = update_curr_se(rq_of(cfs_rq), curr);
+ if (unlikely(delta_exec <= 0))
+ return;

curr->vruntime += calc_delta_fair(delta_exec, curr);
update_deadline(cfs_rq, curr);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 0597ba0f85ff..e23cc67c9467 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1046,24 +1046,15 @@ static void update_curr_rt(struct rq *rq)
{
struct task_struct *curr = rq->curr;
struct sched_rt_entity *rt_se = &curr->rt;
- u64 delta_exec;
- u64 now;
+ s64 delta_exec;

if (curr->sched_class != &rt_sched_class)
return;

- now = rq_clock_task(rq);
- delta_exec = now - curr->se.exec_start;
- if (unlikely((s64)delta_exec <= 0))
+ delta_exec = update_curr_common(rq);
+ if (unlikely(delta_exec <= 0))
return;

- schedstat_set(curr->stats.exec_max,
- max(curr->stats.exec_max, delta_exec));
-
- trace_sched_stat_runtime(curr, delta_exec, 0);
-
- update_current_exec_runtime(curr, now, delta_exec);
-
if (!rt_bandwidth_enabled())
return;

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 04846272409c..1def5b7fa1df 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2228,6 +2228,8 @@ struct affinity_context {
unsigned int flags;
};

+extern s64 update_curr_common(struct rq *rq);
+
struct sched_class {

#ifdef CONFIG_UCLAMP_TASK
@@ -3280,16 +3282,6 @@ extern int sched_dynamic_mode(const char *str);
extern void sched_dynamic_update(int mode);
#endif

-static inline void update_current_exec_runtime(struct task_struct *curr,
- u64 now, u64 delta_exec)
-{
- curr->se.sum_exec_runtime += delta_exec;
- account_group_exec_runtime(curr, delta_exec);
-
- curr->se.exec_start = now;
- cgroup_account_cputime(curr, delta_exec);
-}
-
#ifdef CONFIG_SCHED_MM_CID

#define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 85590599b4d6..7595494ceb6d 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -70,18 +70,7 @@ static void yield_task_stop(struct rq *rq)

static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
{
- struct task_struct *curr = rq->curr;
- u64 now, delta_exec;
-
- now = rq_clock_task(rq);
- delta_exec = now - curr->se.exec_start;
- if (unlikely((s64)delta_exec < 0))
- delta_exec = 0;
-
- schedstat_set(curr->stats.exec_max,
- max(curr->stats.exec_max, delta_exec));
-
- update_current_exec_runtime(curr, now, delta_exec);
+ update_curr_common(rq);
}

/*
--
2.40.1



2023-09-15 21:58:22

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v4 1/7] sched: Unify runtime accounting across classes

On Thu, 31 Aug 2023 22:28:52 +0200
Daniel Bristot de Oliveira <[email protected]> wrote:

As I have a vested interest in this work, I started a deep dive into the
code.

> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1299,9 +1299,8 @@ static void update_curr_dl(struct rq *rq)
> {
> struct task_struct *curr = rq->curr;
> struct sched_dl_entity *dl_se = &curr->dl;
> - u64 delta_exec, scaled_delta_exec;
> + s64 delta_exec, scaled_delta_exec;
> int cpu = cpu_of(rq);
> - u64 now;
>
> if (!dl_task(curr) || !on_dl_rq(dl_se))
> return;
> @@ -1314,21 +1313,13 @@ static void update_curr_dl(struct rq *rq)
> * natural solution, but the full ramifications of this
> * approach need further study.
> */
> - now = rq_clock_task(rq);
> - delta_exec = now - curr->se.exec_start;
> - if (unlikely((s64)delta_exec <= 0)) {
> + delta_exec = update_curr_common(rq);

I have to say, mapping the update_curr_common() to the removed code below,
wasn't as easy as I thought it would be, as the above function is broken up
slightly differently.

But the conclusion does appear to be pretty much the same.

Reviewed-by: Steven Rostedt (Google) <[email protected]>

-- Steve


> + if (unlikely(delta_exec <= 0)) {
> if (unlikely(dl_se->dl_yielded))
> goto throttle;
> return;
> }
>
> - schedstat_set(curr->stats.exec_max,
> - max(curr->stats.exec_max, delta_exec));
> -
> - trace_sched_stat_runtime(curr, delta_exec, 0);
> -
> - update_current_exec_runtime(curr, now, delta_exec);
> -
> if (dl_entity_is_special(dl_se))
> return;
>