2023-05-18 01:45:17

by Josh Don

[permalink] [raw]
Subject: [PATCH 1/2] sched: don't account throttle time for empty groups

It is easy for a cfs_rq to become throttled even when it has no enqueued
entities (for example, if we have just put_prev()'d the last runnable
task of the cfs_rq, and the cfs_rq is out of quota).

Avoid accounting this time towards total throttle time, since it
otherwise falsely inflates the stats.

Signed-off-by: Josh Don <[email protected]>
---
kernel/sched/fair.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f4b8b906d30a..85c2c0c3cab6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4768,6 +4768,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
bool curr = cfs_rq->curr == se;
+ struct rq *rq = rq_of(cfs_rq);

/*
* If we're the current task, we must renormalise before calling
@@ -4816,8 +4817,12 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)

if (cfs_rq->nr_running == 1) {
check_enqueue_throttle(cfs_rq);
- if (!throttled_hierarchy(cfs_rq))
+ if (!throttled_hierarchy(cfs_rq)) {
list_add_leaf_cfs_rq(cfs_rq);
+ } else {
+ if (!cfs_rq->throttled_clock)
+ cfs_rq->throttled_clock = rq_clock(rq);
+ }
}
}

@@ -5423,7 +5428,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
* throttled-list. rq->lock protects completion.
*/
cfs_rq->throttled = 1;
- cfs_rq->throttled_clock = rq_clock(rq);
+ SCHED_WARN_ON(cfs_rq->throttled_clock);
+ if (cfs_rq->nr_running)
+ cfs_rq->throttled_clock = rq_clock(rq);
return true;
}

@@ -5441,7 +5448,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
update_rq_clock(rq);

raw_spin_lock(&cfs_b->lock);
- cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+ if (cfs_rq->throttled_clock) {
+ cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+ cfs_rq->throttled_clock = 0;
+ }
list_del_rcu(&cfs_rq->throttled_list);
raw_spin_unlock(&cfs_b->lock);

--
2.40.1.606.ga4b1b128d6-goog



2023-05-18 01:46:55

by Josh Don

[permalink] [raw]
Subject: [PATCH 2/2] sched: add throttled time stat for throttled children

We currently export the total throttled time for cgroups that are given
a bandwidth limit. This patch extends this accounting to also account
the total time that each children cgroup has been throttled.

This is useful to understand the degree to which children have been
affected by the throttling control. Children which are not runnable
during the entire throttled period, for example, will not show any
self-throttling time during this period.

Signed-off-by: Josh Don <[email protected]>
---
kernel/sched/core.c | 21 +++++++++++++++++++--
kernel/sched/fair.c | 17 +++++++++++++++++
kernel/sched/sched.h | 2 ++
3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a384344ec9ee..fcd702889fcb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11064,6 +11064,18 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
return ret;
}

+static u64 throttled_time_self(struct task_group *tg)
+{
+ int i;
+ u64 total = 0;
+
+ for_each_possible_cpu(i) {
+ total += READ_ONCE(tg->cfs_rq[i]->throttled_clock_self_time);
+ }
+
+ return total;
+}
+
static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
{
struct task_group *tg = css_tg(seq_css(sf));
@@ -11072,6 +11084,7 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods);
seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled);
seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
+ seq_printf(sf, "throttled_time_self %llu\n", throttled_time_self(tg));

if (schedstat_enabled() && tg != &root_task_group) {
struct sched_statistics *stats;
@@ -11204,20 +11217,24 @@ static int cpu_extra_stat_show(struct seq_file *sf,
{
struct task_group *tg = css_tg(css);
struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
- u64 throttled_usec, burst_usec;
+ u64 throttled_usec, burst_usec, throttled_self_usec;

throttled_usec = cfs_b->throttled_time;
do_div(throttled_usec, NSEC_PER_USEC);
+ throttled_self_usec = throttled_time_self(tg);
+ do_div(throttled_self_usec, NSEC_PER_USEC);
burst_usec = cfs_b->burst_time;
do_div(burst_usec, NSEC_PER_USEC);

seq_printf(sf, "nr_periods %d\n"
"nr_throttled %d\n"
"throttled_usec %llu\n"
+ "throttled_self_usec %llu\n"
"nr_bursts %d\n"
"burst_usec %llu\n",
cfs_b->nr_periods, cfs_b->nr_throttled,
- throttled_usec, cfs_b->nr_burst, burst_usec);
+ throttled_usec, throttled_self_usec, cfs_b->nr_burst,
+ burst_usec);
}
#endif
return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 85c2c0c3cab6..fd348b9421c1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4822,6 +4822,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
} else {
if (!cfs_rq->throttled_clock)
cfs_rq->throttled_clock = rq_clock(rq);
+ if (!cfs_rq->throttled_clock_self)
+ cfs_rq->throttled_clock_self = rq_clock(rq);
}
}
}
@@ -5327,6 +5329,17 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
list_add_leaf_cfs_rq(cfs_rq);
}

+ if (cfs_rq->throttled_clock_self) {
+ u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
+
+ cfs_rq->throttled_clock_self = 0;
+
+ if (SCHED_WARN_ON((s64)delta < 0))
+ delta = 0;
+
+ cfs_rq->throttled_clock_self_time += delta;
+ }
+
return 0;
}

@@ -5342,6 +5355,10 @@ static int tg_throttle_down(struct task_group *tg, void *data)
}
cfs_rq->throttle_count++;

+ SCHED_WARN_ON(cfs_rq->throttled_clock_self);
+ if (cfs_rq->nr_running)
+ cfs_rq->throttled_clock_self = rq_clock(rq);
+
return 0;
}

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 060616944d7a..ddc48b2f1fd0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -642,6 +642,8 @@ struct cfs_rq {
u64 throttled_clock;
u64 throttled_clock_pelt;
u64 throttled_clock_pelt_time;
+ u64 throttled_clock_self;
+ u64 throttled_clock_self_time;
int throttled;
int throttle_count;
struct list_head throttled_list;
--
2.40.1.606.ga4b1b128d6-goog


2023-05-18 08:30:32

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/2] sched: don't account throttle time for empty groups

Hi Josh,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on tip/master tip/auto-latest linus/master v6.4-rc2 next-20230518]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Josh-Don/sched-add-throttled-time-stat-for-throttled-children/20230518-095541
base: tip/sched/core
patch link: https://lore.kernel.org/r/20230518013414.3053254-1-joshdon%40google.com
patch subject: [PATCH 1/2] sched: don't account throttle time for empty groups
config: arm-randconfig-r025-20230517
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project b0fb98227c90adf2536c9ad644a74d5e92961111)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm cross compiling tool for clang build
# apt-get install binutils-arm-linux-gnueabi
# https://github.com/intel-lab-lkp/linux/commit/9414859be598a05f1bb078f8bf83e132976384ea
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Josh-Don/sched-add-throttled-time-stat-for-throttled-children/20230518-095541
git checkout 9414859be598a05f1bb078f8bf83e132976384ea
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash kernel/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

>> kernel/sched/fair.c:4880:17: error: no member named 'throttled_clock' in 'struct cfs_rq'
if (!cfs_rq->throttled_clock)
~~~~~~ ^
kernel/sched/fair.c:4881:13: error: no member named 'throttled_clock' in 'struct cfs_rq'
cfs_rq->throttled_clock = rq_clock(rq);
~~~~~~ ^
kernel/sched/fair.c:6181:6: warning: no previous prototype for function 'init_cfs_bandwidth' [-Wmissing-prototypes]
void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
^
kernel/sched/fair.c:6181:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
^
static
kernel/sched/fair.c:12617:6: warning: no previous prototype for function 'free_fair_sched_group' [-Wmissing-prototypes]
void free_fair_sched_group(struct task_group *tg) { }
^
kernel/sched/fair.c:12617:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
void free_fair_sched_group(struct task_group *tg) { }
^
static
kernel/sched/fair.c:12619:5: warning: no previous prototype for function 'alloc_fair_sched_group' [-Wmissing-prototypes]
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
^
kernel/sched/fair.c:12619:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
^
static
kernel/sched/fair.c:12624:6: warning: no previous prototype for function 'online_fair_sched_group' [-Wmissing-prototypes]
void online_fair_sched_group(struct task_group *tg) { }
^
kernel/sched/fair.c:12624:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
void online_fair_sched_group(struct task_group *tg) { }
^
static
kernel/sched/fair.c:12626:6: warning: no previous prototype for function 'unregister_fair_sched_group' [-Wmissing-prototypes]
void unregister_fair_sched_group(struct task_group *tg) { }
^
kernel/sched/fair.c:12626:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
void unregister_fair_sched_group(struct task_group *tg) { }
^
static
5 warnings and 2 errors generated.


vim +4880 kernel/sched/fair.c

4792
4793 /*
4794 * MIGRATION
4795 *
4796 * dequeue
4797 * update_curr()
4798 * update_min_vruntime()
4799 * vruntime -= min_vruntime
4800 *
4801 * enqueue
4802 * update_curr()
4803 * update_min_vruntime()
4804 * vruntime += min_vruntime
4805 *
4806 * this way the vruntime transition between RQs is done when both
4807 * min_vruntime are up-to-date.
4808 *
4809 * WAKEUP (remote)
4810 *
4811 * ->migrate_task_rq_fair() (p->state == TASK_WAKING)
4812 * vruntime -= min_vruntime
4813 *
4814 * enqueue
4815 * update_curr()
4816 * update_min_vruntime()
4817 * vruntime += min_vruntime
4818 *
4819 * this way we don't have the most up-to-date min_vruntime on the originating
4820 * CPU and an up-to-date min_vruntime on the destination CPU.
4821 */
4822
4823 static void
4824 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
4825 {
4826 bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
4827 bool curr = cfs_rq->curr == se;
4828 struct rq *rq = rq_of(cfs_rq);
4829
4830 /*
4831 * If we're the current task, we must renormalise before calling
4832 * update_curr().
4833 */
4834 if (renorm && curr)
4835 se->vruntime += cfs_rq->min_vruntime;
4836
4837 update_curr(cfs_rq);
4838
4839 /*
4840 * Otherwise, renormalise after, such that we're placed at the current
4841 * moment in time, instead of some random moment in the past. Being
4842 * placed in the past could significantly boost this task to the
4843 * fairness detriment of existing tasks.
4844 */
4845 if (renorm && !curr)
4846 se->vruntime += cfs_rq->min_vruntime;
4847
4848 /*
4849 * When enqueuing a sched_entity, we must:
4850 * - Update loads to have both entity and cfs_rq synced with now.
4851 * - For group_entity, update its runnable_weight to reflect the new
4852 * h_nr_running of its group cfs_rq.
4853 * - For group_entity, update its weight to reflect the new share of
4854 * its group cfs_rq
4855 * - Add its new weight to cfs_rq->load.weight
4856 */
4857 update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
4858 se_update_runnable(se);
4859 update_cfs_group(se);
4860 account_entity_enqueue(cfs_rq, se);
4861
4862 if (flags & ENQUEUE_WAKEUP)
4863 place_entity(cfs_rq, se, 0);
4864 /* Entity has migrated, no longer consider this task hot */
4865 if (flags & ENQUEUE_MIGRATED)
4866 se->exec_start = 0;
4867
4868 check_schedstat_required();
4869 update_stats_enqueue_fair(cfs_rq, se, flags);
4870 check_spread(cfs_rq, se);
4871 if (!curr)
4872 __enqueue_entity(cfs_rq, se);
4873 se->on_rq = 1;
4874
4875 if (cfs_rq->nr_running == 1) {
4876 check_enqueue_throttle(cfs_rq);
4877 if (!throttled_hierarchy(cfs_rq)) {
4878 list_add_leaf_cfs_rq(cfs_rq);
4879 } else {
> 4880 if (!cfs_rq->throttled_clock)
4881 cfs_rq->throttled_clock = rq_clock(rq);
4882 }
4883 }
4884 }
4885

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


Attachments:
(No filename) (7.77 kB)
config (143.02 kB)
Download all attachments

2023-05-18 12:24:18

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 2/2] sched: add throttled time stat for throttled children

Hi Josh,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on tip/master tip/auto-latest linus/master v6.4-rc2 next-20230518]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Josh-Don/sched-add-throttled-time-stat-for-throttled-children/20230518-095541
base: tip/sched/core
patch link: https://lore.kernel.org/r/20230518013414.3053254-2-joshdon%40google.com
patch subject: [PATCH 2/2] sched: add throttled time stat for throttled children
config: mips-decstation_defconfig
compiler: mipsel-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/557f81b701a0759daea26b798b7c00ac5aa027ae
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Josh-Don/sched-add-throttled-time-stat-for-throttled-children/20230518-095541
git checkout 557f81b701a0759daea26b798b7c00ac5aa027ae
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

kernel/sched/fair.c: In function 'enqueue_entity':
kernel/sched/fair.c:4880:36: error: 'struct cfs_rq' has no member named 'throttled_clock'
4880 | if (!cfs_rq->throttled_clock)
| ^~
kernel/sched/fair.c:4881:39: error: 'struct cfs_rq' has no member named 'throttled_clock'
4881 | cfs_rq->throttled_clock = rq_clock(rq);
| ^~
>> kernel/sched/fair.c:4882:36: error: 'struct cfs_rq' has no member named 'throttled_clock_self'
4882 | if (!cfs_rq->throttled_clock_self)
| ^~
kernel/sched/fair.c:4883:39: error: 'struct cfs_rq' has no member named 'throttled_clock_self'
4883 | cfs_rq->throttled_clock_self = rq_clock(rq);
| ^~
kernel/sched/fair.c: At top level:
kernel/sched/fair.c:6198:6: warning: no previous prototype for 'init_cfs_bandwidth' [-Wmissing-prototypes]
6198 | void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
| ^~~~~~~~~~~~~~~~~~
kernel/sched/fair.c:12634:6: warning: no previous prototype for 'free_fair_sched_group' [-Wmissing-prototypes]
12634 | void free_fair_sched_group(struct task_group *tg) { }
| ^~~~~~~~~~~~~~~~~~~~~
kernel/sched/fair.c:12636:5: warning: no previous prototype for 'alloc_fair_sched_group' [-Wmissing-prototypes]
12636 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
| ^~~~~~~~~~~~~~~~~~~~~~
kernel/sched/fair.c:12641:6: warning: no previous prototype for 'online_fair_sched_group' [-Wmissing-prototypes]
12641 | void online_fair_sched_group(struct task_group *tg) { }
| ^~~~~~~~~~~~~~~~~~~~~~~
kernel/sched/fair.c:12643:6: warning: no previous prototype for 'unregister_fair_sched_group' [-Wmissing-prototypes]
12643 | void unregister_fair_sched_group(struct task_group *tg) { }
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~


vim +4882 kernel/sched/fair.c

4792
4793 /*
4794 * MIGRATION
4795 *
4796 * dequeue
4797 * update_curr()
4798 * update_min_vruntime()
4799 * vruntime -= min_vruntime
4800 *
4801 * enqueue
4802 * update_curr()
4803 * update_min_vruntime()
4804 * vruntime += min_vruntime
4805 *
4806 * this way the vruntime transition between RQs is done when both
4807 * min_vruntime are up-to-date.
4808 *
4809 * WAKEUP (remote)
4810 *
4811 * ->migrate_task_rq_fair() (p->state == TASK_WAKING)
4812 * vruntime -= min_vruntime
4813 *
4814 * enqueue
4815 * update_curr()
4816 * update_min_vruntime()
4817 * vruntime += min_vruntime
4818 *
4819 * this way we don't have the most up-to-date min_vruntime on the originating
4820 * CPU and an up-to-date min_vruntime on the destination CPU.
4821 */
4822
4823 static void
4824 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
4825 {
4826 bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
4827 bool curr = cfs_rq->curr == se;
4828 struct rq *rq = rq_of(cfs_rq);
4829
4830 /*
4831 * If we're the current task, we must renormalise before calling
4832 * update_curr().
4833 */
4834 if (renorm && curr)
4835 se->vruntime += cfs_rq->min_vruntime;
4836
4837 update_curr(cfs_rq);
4838
4839 /*
4840 * Otherwise, renormalise after, such that we're placed at the current
4841 * moment in time, instead of some random moment in the past. Being
4842 * placed in the past could significantly boost this task to the
4843 * fairness detriment of existing tasks.
4844 */
4845 if (renorm && !curr)
4846 se->vruntime += cfs_rq->min_vruntime;
4847
4848 /*
4849 * When enqueuing a sched_entity, we must:
4850 * - Update loads to have both entity and cfs_rq synced with now.
4851 * - For group_entity, update its runnable_weight to reflect the new
4852 * h_nr_running of its group cfs_rq.
4853 * - For group_entity, update its weight to reflect the new share of
4854 * its group cfs_rq
4855 * - Add its new weight to cfs_rq->load.weight
4856 */
4857 update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
4858 se_update_runnable(se);
4859 update_cfs_group(se);
4860 account_entity_enqueue(cfs_rq, se);
4861
4862 if (flags & ENQUEUE_WAKEUP)
4863 place_entity(cfs_rq, se, 0);
4864 /* Entity has migrated, no longer consider this task hot */
4865 if (flags & ENQUEUE_MIGRATED)
4866 se->exec_start = 0;
4867
4868 check_schedstat_required();
4869 update_stats_enqueue_fair(cfs_rq, se, flags);
4870 check_spread(cfs_rq, se);
4871 if (!curr)
4872 __enqueue_entity(cfs_rq, se);
4873 se->on_rq = 1;
4874
4875 if (cfs_rq->nr_running == 1) {
4876 check_enqueue_throttle(cfs_rq);
4877 if (!throttled_hierarchy(cfs_rq)) {
4878 list_add_leaf_cfs_rq(cfs_rq);
4879 } else {
4880 if (!cfs_rq->throttled_clock)
4881 cfs_rq->throttled_clock = rq_clock(rq);
> 4882 if (!cfs_rq->throttled_clock_self)
4883 cfs_rq->throttled_clock_self = rq_clock(rq);
4884 }
4885 }
4886 }
4887

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


Attachments:
(No filename) (7.32 kB)
config (53.33 kB)
Download all attachments

2023-05-23 21:50:50

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH 2/2] sched: add throttled time stat for throttled children

Hello,

(cc'ing Johannes)

On Wed, May 17, 2023 at 06:34:14PM -0700, Josh Don wrote:
> We currently export the total throttled time for cgroups that are given
> a bandwidth limit. This patch extends this accounting to also account
> the total time that each children cgroup has been throttled.
>
> This is useful to understand the degree to which children have been
> affected by the throttling control. Children which are not runnable
> during the entire throttled period, for example, will not show any
> self-throttling time during this period.
...
> @@ -11204,20 +11217,24 @@ static int cpu_extra_stat_show(struct seq_file *sf,
> {
> struct task_group *tg = css_tg(css);
> struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
> - u64 throttled_usec, burst_usec;
> + u64 throttled_usec, burst_usec, throttled_self_usec;
>
> throttled_usec = cfs_b->throttled_time;
> do_div(throttled_usec, NSEC_PER_USEC);
> + throttled_self_usec = throttled_time_self(tg);
> + do_div(throttled_self_usec, NSEC_PER_USEC);
> burst_usec = cfs_b->burst_time;
> do_div(burst_usec, NSEC_PER_USEC);
>
> seq_printf(sf, "nr_periods %d\n"
> "nr_throttled %d\n"
> "throttled_usec %llu\n"
> + "throttled_self_usec %llu\n"

This is fine in principle but I think it'd be better to keep it consistent
with how non-hierarchical events are in memory.events.local. ie. Can we
please add cpu.stat.local instead of adding the _self key to cpu.stat?

Thanks.

--
tejun

2023-05-25 00:50:35

by Josh Don

[permalink] [raw]
Subject: Re: [PATCH 2/2] sched: add throttled time stat for throttled children

Hey Tejun,

On Tue, May 23, 2023 at 2:44 PM Tejun Heo <[email protected]> wrote:
>
> Hello,
>
> (cc'ing Johannes)
>
> On Wed, May 17, 2023 at 06:34:14PM -0700, Josh Don wrote:
> > We currently export the total throttled time for cgroups that are given
> > a bandwidth limit. This patch extends this accounting to also account
> > the total time that each children cgroup has been throttled.
> >
> > This is useful to understand the degree to which children have been
> > affected by the throttling control. Children which are not runnable
> > during the entire throttled period, for example, will not show any
> > self-throttling time during this period.
> ...
> > @@ -11204,20 +11217,24 @@ static int cpu_extra_stat_show(struct seq_file *sf,
> > {
> > struct task_group *tg = css_tg(css);
> > struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
> > - u64 throttled_usec, burst_usec;
> > + u64 throttled_usec, burst_usec, throttled_self_usec;
> >
> > throttled_usec = cfs_b->throttled_time;
> > do_div(throttled_usec, NSEC_PER_USEC);
> > + throttled_self_usec = throttled_time_self(tg);
> > + do_div(throttled_self_usec, NSEC_PER_USEC);
> > burst_usec = cfs_b->burst_time;
> > do_div(burst_usec, NSEC_PER_USEC);
> >
> > seq_printf(sf, "nr_periods %d\n"
> > "nr_throttled %d\n"
> > "throttled_usec %llu\n"
> > + "throttled_self_usec %llu\n"
>
> This is fine in principle but I think it'd be better to keep it consistent
> with how non-hierarchical events are in memory.events.local. ie. Can we
> please add cpu.stat.local instead of adding the _self key to cpu.stat?

It seemed unfortunate to split it out into a separate interface, since
there wouldn't be much to put there, but I don't have a strong
objection to this. I'll go ahead and make that change for v2.

Best,
Josh