The following statistics in cpu.stat file is added to show how much workload
is making use of cfs_b burst:
nr_bursts: number of periods bandwidth burst occurs
burst_usec: cumulative wall-time that any cpus has
used above quota in respective periods
The larger nr_bursts is, the more bursty periods there are. And the larger
burst_usec is, the more burst time is used by bursty workload.
Co-developed-by: Shanpei Chen <[email protected]>
Signed-off-by: Shanpei Chen <[email protected]>
Co-developed-by: Tianchen Ding <[email protected]>
Signed-off-by: Tianchen Ding <[email protected]>
Signed-off-by: Huaixin Chang <[email protected]>
---
kernel/sched/core.c | 13 ++++++++++---
kernel/sched/fair.c | 11 +++++++++++
kernel/sched/sched.h | 3 +++
3 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b58ced2194a0..1e41c51b14b5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9265,6 +9265,9 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
seq_printf(sf, "wait_sum %llu\n", ws);
}
+ seq_printf(sf, "nr_bursts %d\n", cfs_b->nr_burst);
+ seq_printf(sf, "burst_usec %llu\n", cfs_b->burst_time);
+
return 0;
}
#endif /* CONFIG_CFS_BANDWIDTH */
@@ -9361,16 +9364,20 @@ static int cpu_extra_stat_show(struct seq_file *sf,
{
struct task_group *tg = css_tg(css);
struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
- u64 throttled_usec;
+ u64 throttled_usec, burst_usec;
throttled_usec = cfs_b->throttled_time;
do_div(throttled_usec, NSEC_PER_USEC);
+ burst_usec = cfs_b->burst_time;
+ do_div(burst_usec, NSEC_PER_USEC);
seq_printf(sf, "nr_periods %d\n"
"nr_throttled %d\n"
- "throttled_usec %llu\n",
+ "throttled_usec %llu\n"
+ "nr_bursts %d\n"
+ "burst_usec %llu\n",
cfs_b->nr_periods, cfs_b->nr_throttled,
- throttled_usec);
+ throttled_usec, cfs_b->nr_burst, burst_usec);
}
#endif
return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 53d7cc4d009b..62b73722e510 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4634,11 +4634,22 @@ static inline u64 sched_cfs_bandwidth_slice(void)
*/
void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
{
+ u64 runtime;
+
if (unlikely(cfs_b->quota == RUNTIME_INF))
return;
+ if (cfs_b->runtime_at_period_start > cfs_b->runtime) {
+ runtime = cfs_b->runtime_at_period_start - cfs_b->runtime;
+ if (runtime > cfs_b->quota) {
+ cfs_b->burst_time += runtime - cfs_b->quota;
+ cfs_b->nr_burst++;
+ }
+ }
+
cfs_b->runtime += cfs_b->quota;
cfs_b->runtime = min(cfs_b->runtime, cfs_b->quota + cfs_b->burst);
+ cfs_b->runtime_at_period_start = cfs_b->runtime;
}
static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d317ca74a48c..b770b553dfbb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -367,6 +367,7 @@ struct cfs_bandwidth {
u64 quota;
u64 runtime;
u64 burst;
+ u64 runtime_at_period_start;
s64 hierarchical_quota;
u8 idle;
@@ -379,7 +380,9 @@ struct cfs_bandwidth {
/* Statistics: */
int nr_periods;
int nr_throttled;
+ int nr_burst;
u64 throttled_time;
+ u64 burst_time;
#endif
};
--
2.14.4.44.g2045bb6
> On Jun 28, 2021, at 11:00 PM, Peter Zijlstra <[email protected]> wrote:
>
> On Mon, Jun 21, 2021 at 05:27:59PM +0800, Huaixin Chang wrote:
>> The following statistics in cpu.stat file is added to show how much workload
>> is making use of cfs_b burst:
>>
>> nr_bursts: number of periods bandwidth burst occurs
>> burst_usec: cumulative wall-time that any cpus has
>> used above quota in respective periods
>>
>> The larger nr_bursts is, the more bursty periods there are. And the larger
>> burst_usec is, the more burst time is used by bursty workload.
>
> That's what it does, but fails to explain why. How is this number
> useful.
>
How about this?
The cfs_b burst feature avoids throttling by allowing bandwidth bursts. When using cfs_b
burst, users configure burst and see if it helps from workload latency and cfs_b interval
statistics like nr_throttled. Also two new statistics are introduced to show the internal of burst featrue
and explain why burst helps or not:
nr_bursts: number of periods bandwidth burst occurs
burst_usec: cumulative wall-time that any cpus has
used above quota in respective periods
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 53d7cc4d009b..62b73722e510 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -4634,11 +4634,22 @@ static inline u64 sched_cfs_bandwidth_slice(void)
>> */
>> void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
>> {
>> + u64 runtime;
>> +
>> if (unlikely(cfs_b->quota == RUNTIME_INF))
>> return;
>>
>> + if (cfs_b->runtime_at_period_start > cfs_b->runtime) {
>> + runtime = cfs_b->runtime_at_period_start - cfs_b->runtime;
>
> That comparison is the same as the subtraction; might as well write
> this:
>
>> + if (runtime > cfs_b->quota) {
>> + cfs_b->burst_time += runtime - cfs_b->quota;
>
> Same here.
>
>> + cfs_b->nr_burst++;
>> + }
>> + }
>
>
> Perhaps we can write that like:
>
> s64 runtime = cfs_b->runtime_snapshot - cfs_b->runtime;
> if (runtime > 0) {
> s64 burstime = runtime - cfs_q->quota;
> if (burstime > 0) {
> cfs_b->bust_time += bursttime;
> cfs_b->nr_bursts++;
> }
> }
>
> I was hoping we could get away with something simpler, like maybe:
>
Got it.
> u64 old_runtim = cfs_b->runtime;
>
> cfs_b->runtime += cfs_b->quota
> cfs_b->runtime = min(cfs_b->runtime, cfs_b->quota + cfs_b->burst);
>
> if (cfs_b->runtime - old_runtime > cfs_b->quota)
> cfs_b->nr_bursts++;
>
> Would that be good enough?
>
>
>> +
>> cfs_b->runtime += cfs_b->quota;
>> cfs_b->runtime = min(cfs_b->runtime, cfs_b->quota + cfs_b->burst);
>> + cfs_b->runtime_at_period_start = cfs_b->runtime;
>> }
>>
>> static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
>> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>> index d317ca74a48c..b770b553dfbb 100644
>> --- a/kernel/sched/sched.h
>> +++ b/kernel/sched/sched.h
>> @@ -367,6 +367,7 @@ struct cfs_bandwidth {
>> u64 quota;
>> u64 runtime;
>> u64 burst;
>> + u64 runtime_at_period_start;
>> s64 hierarchical_quota;
>
> As per the above, I don't really like that name, runtime_snapshot or
> perhaps runtime_snap is shorter and not less clear. But not having it at
> all would be even better.