update_current_exec_runtime would update execution time for each task,
we can use update_current_exec_runtime simplify code.
The 1st and 2nd patch update update_current_exec_runtime() applies to all
callers.
The 3rd patch use update_current_exec_runtime simplify update_curr.
Yajun Deng (3):
sched: Don't account execution time for task group
sched: Don't trace stat runtime for task group
sched/fair: Simplify update_curr()
kernel/sched/deadline.c | 4 +---
kernel/sched/fair.c | 13 +++----------
kernel/sched/rt.c | 5 ++---
kernel/sched/sched.h | 11 +++++++----
kernel/sched/stop_task.c | 2 +-
5 files changed, 14 insertions(+), 21 deletions(-)
--
2.25.1
The rt entity can be a task group. We will account execution time for
each task. For consistency, we don't need to account execution time for
task group.
Pass a parameter to update_current_exec_runtime, let the caller decide
whether account execution time.
Signed-off-by: Yajun Deng <[email protected]>
Suggested-by: Peter Zijlstra <[email protected]>
---
v3: Pass a parameter to update_current_exec_runtime.
v2: Add the missing '#endif'.
v1: https://lore.kernel.org/all/[email protected]/
---
kernel/sched/deadline.c | 2 +-
kernel/sched/rt.c | 3 ++-
kernel/sched/sched.h | 10 ++++++----
kernel/sched/stop_task.c | 2 +-
4 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b28114478b82..a9f84428c4b5 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1303,7 +1303,7 @@ static void update_curr_dl(struct rq *rq)
trace_sched_stat_runtime(curr, delta_exec, 0);
- update_current_exec_runtime(curr, now, delta_exec);
+ update_current_exec_runtime(curr, now, delta_exec, true);
if (dl_entity_is_special(dl_se))
return;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 6aaf0a3d6081..79cf80d73822 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1018,7 +1018,8 @@ static void update_curr_rt(struct rq *rq)
trace_sched_stat_runtime(curr, delta_exec, 0);
- update_current_exec_runtime(curr, now, delta_exec);
+ update_current_exec_runtime(curr, now, delta_exec,
+ rt_entity_is_task(rt_se));
if (!rt_bandwidth_enabled())
return;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2e5a95486a42..6f0169d9b306 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3262,13 +3262,15 @@ extern void sched_dynamic_update(int mode);
#endif
static inline void update_current_exec_runtime(struct task_struct *curr,
- u64 now, u64 delta_exec)
+ u64 now, u64 delta_exec, bool task)
{
curr->se.sum_exec_runtime += delta_exec;
- account_group_exec_runtime(curr, delta_exec);
-
curr->se.exec_start = now;
- cgroup_account_cputime(curr, delta_exec);
+
+ if (task) {
+ account_group_exec_runtime(curr, delta_exec);
+ cgroup_account_cputime(curr, delta_exec);
+ }
}
#ifdef CONFIG_SCHED_MM_CID
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 6cf7304e6449..1bec2af7ce8d 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -81,7 +81,7 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
schedstat_set(curr->stats.exec_max,
max(curr->stats.exec_max, delta_exec));
- update_current_exec_runtime(curr, now, delta_exec);
+ update_current_exec_runtime(curr, now, delta_exec, true);
}
/*
--
2.25.1
The rt entity can be a task group. We will trace stat runtime for each
task, so we don't need to trace stat runtime for task group.
Move trace_sched_stat_runtime in update_current_exec_runtime.
Signed-off-by: Yajun Deng <[email protected]>
---
v3: New patch.
---
kernel/sched/deadline.c | 2 --
kernel/sched/rt.c | 2 --
kernel/sched/sched.h | 1 +
3 files changed, 1 insertion(+), 4 deletions(-)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a9f84428c4b5..70b5c5b47106 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1301,8 +1301,6 @@ static void update_curr_dl(struct rq *rq)
schedstat_set(curr->stats.exec_max,
max(curr->stats.exec_max, delta_exec));
- trace_sched_stat_runtime(curr, delta_exec, 0);
-
update_current_exec_runtime(curr, now, delta_exec, true);
if (dl_entity_is_special(dl_se))
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 79cf80d73822..1e155c7658ae 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1016,8 +1016,6 @@ static void update_curr_rt(struct rq *rq)
schedstat_set(curr->stats.exec_max,
max(curr->stats.exec_max, delta_exec));
- trace_sched_stat_runtime(curr, delta_exec, 0);
-
update_current_exec_runtime(curr, now, delta_exec,
rt_entity_is_task(rt_se));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6f0169d9b306..f7014e19bc0a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3268,6 +3268,7 @@ static inline void update_current_exec_runtime(struct task_struct *curr,
curr->se.exec_start = now;
if (task) {
+ trace_sched_stat_runtime(curr, delta_exec, curr->se.vruntime);
account_group_exec_runtime(curr, delta_exec);
cgroup_account_cputime(curr, delta_exec);
}
--
2.25.1
Use update_current_exec_runtime() simplify update_curr().
Signed-off-by: Yajun Deng <[email protected]>
---
v3: New patch.
---
kernel/sched/fair.c | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2048138ce54b..ffc000e49db5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1135,6 +1135,7 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
static void update_curr(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
+ struct task_struct *curtask = rq_of(cfs_rq)->curr;
u64 now = rq_clock_task(rq_of(cfs_rq));
u64 delta_exec;
@@ -1145,8 +1146,6 @@ static void update_curr(struct cfs_rq *cfs_rq)
if (unlikely((s64)delta_exec <= 0))
return;
- curr->exec_start = now;
-
if (schedstat_enabled()) {
struct sched_statistics *stats;
@@ -1155,20 +1154,14 @@ static void update_curr(struct cfs_rq *cfs_rq)
max(delta_exec, stats->exec_max));
}
- curr->sum_exec_runtime += delta_exec;
schedstat_add(cfs_rq->exec_clock, delta_exec);
curr->vruntime += calc_delta_fair(delta_exec, curr);
update_deadline(cfs_rq, curr);
update_min_vruntime(cfs_rq);
- if (entity_is_task(curr)) {
- struct task_struct *curtask = task_of(curr);
-
- trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
- cgroup_account_cputime(curtask, delta_exec);
- account_group_exec_runtime(curtask, delta_exec);
- }
+ update_current_exec_runtime(curtask, now, delta_exec,
+ entity_is_task(curr));
account_cfs_rq_runtime(cfs_rq, delta_exec);
}
--
2.25.1
On Tue, Oct 31, 2023 at 08:59:25PM +0800, Yajun Deng wrote:
> update_current_exec_runtime would update execution time for each task,
> we can use update_current_exec_runtime simplify code.
>
> The 1st and 2nd patch update update_current_exec_runtime() applies to all
> callers.
> The 3rd patch use update_current_exec_runtime simplify update_curr.
>
> Yajun Deng (3):
> sched: Don't account execution time for task group
> sched: Don't trace stat runtime for task group
> sched/fair: Simplify update_curr()
>
> kernel/sched/deadline.c | 4 +---
> kernel/sched/fair.c | 13 +++----------
> kernel/sched/rt.c | 5 ++---
> kernel/sched/sched.h | 11 +++++++----
> kernel/sched/stop_task.c | 2 +-
> 5 files changed, 14 insertions(+), 21 deletions(-)
Hurmph, so I'm having conflicts against this:
https://lkml.kernel.org/r/54d148a144f26d9559698c4dd82d8859038a7380.1699095159.git.bristot@kernel.org
(obviously).. I've resolved the first patch, which also mostly includes
the second patch.
However, your second patch isn't entirely right, it now unconditionally
traces ->vruntime, which isn't the same. Imagine a regular task getting
a PI boost to RT, in that case ->vruntime will be non-zero and the RT
task will now be logging a vruntime.
Anyway, that tracepoint doesn't really make sense to me anyway, that is,
it logs a delta_exec and an absolute vruntime, that's inconsistent.
Also, a delta vruntime can be easily computed because the weight should
be known.
I think I'm going to simply remove the vruntime from that tracepoint and
avoid the whole problem.
This then also makes resolving patch 3 easier.
Let me go squish all this and then I'll post a link to whatever came
out.
On Tue, Oct 31, 2023 at 08:59:26PM +0800, Yajun Deng wrote:
> The rt entity can be a task group. We will account execution time for
> each task. For consistency, we don't need to account execution time for
> task group.
>
> Pass a parameter to update_current_exec_runtime, let the caller decide
> whether account execution time.
>
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index 6aaf0a3d6081..79cf80d73822 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1018,7 +1018,8 @@ static void update_curr_rt(struct rq *rq)
>
> trace_sched_stat_runtime(curr, delta_exec, 0);
>
> - update_current_exec_runtime(curr, now, delta_exec);
> + update_current_exec_runtime(curr, now, delta_exec,
> + rt_entity_is_task(rt_se));
>
> if (!rt_bandwidth_enabled())
> return;
ok, I think I've managed to confuse myself again.
But at this point rt_se := &rq->curr->rt, which is *always* a task, no?
On 2023/11/6 20:35, Peter Zijlstra wrote:
> On Tue, Oct 31, 2023 at 08:59:25PM +0800, Yajun Deng wrote:
>> update_current_exec_runtime would update execution time for each task,
>> we can use update_current_exec_runtime simplify code.
>>
>> The 1st and 2nd patch update update_current_exec_runtime() applies to all
>> callers.
>> The 3rd patch use update_current_exec_runtime simplify update_curr.
>>
>> Yajun Deng (3):
>> sched: Don't account execution time for task group
>> sched: Don't trace stat runtime for task group
>> sched/fair: Simplify update_curr()
>>
>> kernel/sched/deadline.c | 4 +---
>> kernel/sched/fair.c | 13 +++----------
>> kernel/sched/rt.c | 5 ++---
>> kernel/sched/sched.h | 11 +++++++----
>> kernel/sched/stop_task.c | 2 +-
>> 5 files changed, 14 insertions(+), 21 deletions(-)
>
> Hurmph, so I'm having conflicts against this:
>
> https://lkml.kernel.org/r/54d148a144f26d9559698c4dd82d8859038a7380.1699095159.git.bristot@kernel.org
>
> (obviously).. I've resolved the first patch, which also mostly includes
> the second patch.
>
> However, your second patch isn't entirely right, it now unconditionally
> traces ->vruntime, which isn't the same. Imagine a regular task getting
> a PI boost to RT, in that case ->vruntime will be non-zero and the RT
> task will now be logging a vruntime.
>
> Anyway, that tracepoint doesn't really make sense to me anyway, that is,
> it logs a delta_exec and an absolute vruntime, that's inconsistent.
> Also, a delta vruntime can be easily computed because the weight should
> be known.
>
> I think I'm going to simply remove the vruntime from that tracepoint and
> avoid the whole problem.
>
> This then also makes resolving patch 3 easier.
>
> Let me go squish all this and then I'll post a link to whatever came
> out.
Got it, thanks!
On 2023/11/6 20:49, Peter Zijlstra wrote:
> On Tue, Oct 31, 2023 at 08:59:26PM +0800, Yajun Deng wrote:
>> The rt entity can be a task group. We will account execution time for
>> each task. For consistency, we don't need to account execution time for
>> task group.
>>
>> Pass a parameter to update_current_exec_runtime, let the caller decide
>> whether account execution time.
>>
>> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
>> index 6aaf0a3d6081..79cf80d73822 100644
>> --- a/kernel/sched/rt.c
>> +++ b/kernel/sched/rt.c
>> @@ -1018,7 +1018,8 @@ static void update_curr_rt(struct rq *rq)
>>
>> trace_sched_stat_runtime(curr, delta_exec, 0);
>>
>> - update_current_exec_runtime(curr, now, delta_exec);
>> + update_current_exec_runtime(curr, now, delta_exec,
>> + rt_entity_is_task(rt_se));
>>
>> if (!rt_bandwidth_enabled())
>> return;
> ok, I think I've managed to confuse myself again.
>
> But at this point rt_se := &rq->curr->rt, which is *always* a task, no?
I think so, but it can be safer to use rt_entity_is_task().