Walter Mack noticed during stress testing on 2 socket Sapphire Rapids
system, there were anomalies where tasks were starved for more
than 70 secs before getting scheduled.
The stress test scenario is an extreme case where about 50 threads
per CPU are started on each core. Then each thread hops from
one core to another continuously.
We discussed this issue with Peter Z., who narrowed
things down to problem with vruntime setting of a migrated
task being too out of sync with the tasks on the target run queue.
Peter suggested the following two patches that did fix
the starvation anomalies that Walter saw.
Yu Chen also kicked the patches into our 0-day test infrastructure to
check for regressions. The performance changes of note are below:
5.15 Throughput 5.15+patchest Test
Changes
4634070 -7.5% 4285823 stress-ng.sigsuspend.ops_per_sec
29934 +37.0% 41006 aim7.jobs-per-min
Stress-ng sigsuspend is the worst affected. But for most workloads,
they are not negatively impacted. In fact, we saw 37% improvement
in Aim7 due to these patches.
Tim
Peter Zijlstra (1):
sched/fair: Don't rely on ->exec_start for migration
Peter Zijlstra (Intel) (1):
sched/fair: Simple runqueue order on migrate
include/linux/sched.h | 1 +
kernel/sched/fair.c | 37 +++++++++++++++++++++++++++++++++----
kernel/sched/features.h | 2 ++
3 files changed, 36 insertions(+), 4 deletions(-)
--
2.32.0
From: Peter Zijlstra <[email protected]>
From: Peter Zijlstra (Intel) <[email protected]>
Currently migrate_task_rq_fair() (ab)uses se->exec_start to make
task_hot() fail. In order to preserve ->exec_start, add a ->migrated
flag to sched_entity.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Tim Chen <[email protected]>
---
include/linux/sched.h | 1 +
kernel/sched/fair.c | 6 +++++-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75ba8aa60248..0edf16b4d40a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -541,6 +541,7 @@ struct sched_entity {
struct rb_node run_node;
struct list_head group_node;
unsigned int on_rq;
+ unsigned int migrated;
u64 exec_start;
u64 sum_exec_runtime;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5146163bfabb..2498e97804fd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1004,6 +1004,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
/*
* We are starting a new run period:
*/
+ se->migrated = 0;
se->exec_start = rq_clock_task(rq_of(cfs_rq));
}
@@ -6979,7 +6980,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
p->se.avg.last_update_time = 0;
/* We have migrated, no longer consider this task hot */
- p->se.exec_start = 0;
+ p->se.migrated = 1;
update_scan_period(p, new_cpu);
}
@@ -7665,6 +7666,9 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
if (sysctl_sched_migration_cost == 0)
return 0;
+ if (p->se.migrated)
+ return 0;
+
delta = rq_clock_task(env->src_rq) - p->se.exec_start;
return delta < (s64)sysctl_sched_migration_cost;
--
2.32.0
From: "Peter Zijlstra (Intel)" <[email protected]>
From: Peter Zijlstra (Intel) <[email protected]>
There's a number of problems with SMP migration of fair tasks, but
basically it boils down to a task not receiving equal service on each
runqueue (consider the trivial 3 tasks 2 cpus infeasible weight
scenario).
Fully solving that with vruntime placement is 'hard', not least
because a task might be very under-services on a busy runqueue and
would need to be placed so far left on the new runqueue that it would
significantly impact latency on the existing tasks.
Instead do minimal / basic placement instead; when moving to a less
busy queue place at the front of the queue to receive time sooner.
When moving to a busier queue, place at the end of the queue to
receive time later.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Tim Chen <[email protected]>
Tested-by: Chen Yu <[email protected]>
Tested-by: Walter Mack <[email protected]>
---
kernel/sched/fair.c | 33 +++++++++++++++++++++++++++++----
kernel/sched/features.h | 2 ++
2 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2498e97804fd..c5d2cb3a8f42 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4223,6 +4223,27 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
se->vruntime = max_vruntime(se->vruntime, vruntime);
}
+static void place_entity_migrate(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ if (!sched_feat(PLACE_MIGRATE))
+ return;
+
+ if (cfs_rq->nr_running < se->migrated) {
+ /*
+ * Migrated to a shorter runqueue, go first because
+ * we were under-served on the old runqueue.
+ */
+ se->vruntime = cfs_rq->min_vruntime;
+ return;
+ }
+
+ /*
+ * Migrated to a longer runqueue, go last because
+ * we got over-served on the old runqueue.
+ */
+ se->vruntime = cfs_rq->min_vruntime + sched_vslice(cfs_rq, se);
+}
+
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
static inline bool cfs_bandwidth_used(void);
@@ -4296,6 +4317,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (flags & ENQUEUE_WAKEUP)
place_entity(cfs_rq, se, 0);
+ else if (se->migrated)
+ place_entity_migrate(cfs_rq, se);
check_schedstat_required();
update_stats_enqueue_fair(cfs_rq, se, flags);
@@ -6930,6 +6953,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
*/
static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
{
+ struct sched_entity *se = &p->se;
/*
* As blocked tasks retain absolute vruntime the migration needs to
* deal with this by subtracting the old and adding the new
@@ -6962,7 +6986,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
* rq->lock and can modify state directly.
*/
lockdep_assert_rq_held(task_rq(p));
- detach_entity_cfs_rq(&p->se);
+ detach_entity_cfs_rq(se);
} else {
/*
@@ -6973,14 +6997,15 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
* wakee task is less decayed, but giving the wakee more load
* sounds not bad.
*/
- remove_entity_load_avg(&p->se);
+ remove_entity_load_avg(se);
}
/* Tell new CPU we are migrated */
- p->se.avg.last_update_time = 0;
+ se->avg.last_update_time = 0;
/* We have migrated, no longer consider this task hot */
- p->se.migrated = 1;
+ for_each_sched_entity(se)
+ se->migrated = READ_ONCE(cfs_rq_of(se)->nr_running) + !se->on_rq;
update_scan_period(p, new_cpu);
}
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 1cf435bbcd9c..681c84fd062c 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -100,3 +100,5 @@ SCHED_FEAT(LATENCY_WARN, false)
SCHED_FEAT(ALT_PERIOD, true)
SCHED_FEAT(BASE_SLICE, true)
+
+SCHED_FEAT(PLACE_MIGRATE, true)
--
2.32.0
On Sat, 26 Mar 2022 at 00:52, Tim Chen <[email protected]> wrote:
>
> From: "Peter Zijlstra (Intel)" <[email protected]>
>
> From: Peter Zijlstra (Intel) <[email protected]>
>
> There's a number of problems with SMP migration of fair tasks, but
> basically it boils down to a task not receiving equal service on each
> runqueue (consider the trivial 3 tasks 2 cpus infeasible weight
> scenario).
>
> Fully solving that with vruntime placement is 'hard', not least
> because a task might be very under-services on a busy runqueue and
> would need to be placed so far left on the new runqueue that it would
> significantly impact latency on the existing tasks.
>
> Instead do minimal / basic placement instead; when moving to a less
> busy queue place at the front of the queue to receive time sooner.
> When moving to a busier queue, place at the end of the queue to
> receive time later.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> Signed-off-by: Tim Chen <[email protected]>
> Tested-by: Chen Yu <[email protected]>
> Tested-by: Walter Mack <[email protected]>
> ---
> kernel/sched/fair.c | 33 +++++++++++++++++++++++++++++----
> kernel/sched/features.h | 2 ++
> 2 files changed, 31 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2498e97804fd..c5d2cb3a8f42 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4223,6 +4223,27 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
> se->vruntime = max_vruntime(se->vruntime, vruntime);
> }
>
> +static void place_entity_migrate(struct cfs_rq *cfs_rq, struct sched_entity *se)
> +{
> + if (!sched_feat(PLACE_MIGRATE))
> + return;
> +
> + if (cfs_rq->nr_running < se->migrated) {
> + /*
> + * Migrated to a shorter runqueue, go first because
> + * we were under-served on the old runqueue.
> + */
> + se->vruntime = cfs_rq->min_vruntime;
> + return;
> + }
> +
> + /*
> + * Migrated to a longer runqueue, go last because
> + * we got over-served on the old runqueue.
> + */
> + se->vruntime = cfs_rq->min_vruntime + sched_vslice(cfs_rq, se);
> +}
> +
> static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
>
> static inline bool cfs_bandwidth_used(void);
> @@ -4296,6 +4317,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>
> if (flags & ENQUEUE_WAKEUP)
> place_entity(cfs_rq, se, 0);
> + else if (se->migrated)
> + place_entity_migrate(cfs_rq, se);
>
> check_schedstat_required();
> update_stats_enqueue_fair(cfs_rq, se, flags);
> @@ -6930,6 +6953,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
> */
> static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
> {
> + struct sched_entity *se = &p->se;
> /*
> * As blocked tasks retain absolute vruntime the migration needs to
> * deal with this by subtracting the old and adding the new
> @@ -6962,7 +6986,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
> * rq->lock and can modify state directly.
> */
> lockdep_assert_rq_held(task_rq(p));
> - detach_entity_cfs_rq(&p->se);
> + detach_entity_cfs_rq(se);
>
> } else {
> /*
> @@ -6973,14 +6997,15 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
> * wakee task is less decayed, but giving the wakee more load
> * sounds not bad.
> */
> - remove_entity_load_avg(&p->se);
> + remove_entity_load_avg(se);
> }
>
> /* Tell new CPU we are migrated */
> - p->se.avg.last_update_time = 0;
> + se->avg.last_update_time = 0;
>
> /* We have migrated, no longer consider this task hot */
> - p->se.migrated = 1;
> + for_each_sched_entity(se)
> + se->migrated = READ_ONCE(cfs_rq_of(se)->nr_running) + !se->on_rq;
Why do we need to loop on se ? Isn't p->se enough ?
>
> update_scan_period(p, new_cpu);
> }
> diff --git a/kernel/sched/features.h b/kernel/sched/features.h
> index 1cf435bbcd9c..681c84fd062c 100644
> --- a/kernel/sched/features.h
> +++ b/kernel/sched/features.h
> @@ -100,3 +100,5 @@ SCHED_FEAT(LATENCY_WARN, false)
>
> SCHED_FEAT(ALT_PERIOD, true)
> SCHED_FEAT(BASE_SLICE, true)
> +
> +SCHED_FEAT(PLACE_MIGRATE, true)
> --
> 2.32.0
>
On 3/26/22 6:54 AM, Tim Chen Wrote:
> From: "Peter Zijlstra (Intel)" <[email protected]>
>
> From: Peter Zijlstra (Intel) <[email protected]>
>
> There's a number of problems with SMP migration of fair tasks, but
> basically it boils down to a task not receiving equal service on each
> runqueue (consider the trivial 3 tasks 2 cpus infeasible weight
> scenario).
>
> Fully solving that with vruntime placement is 'hard', not least
> because a task might be very under-services on a busy runqueue and
> would need to be placed so far left on the new runqueue that it would
> significantly impact latency on the existing tasks.
>
> Instead do minimal / basic placement instead; when moving to a less
> busy queue place at the front of the queue to receive time sooner.
> When moving to a busier queue, place at the end of the queue to
> receive time later.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> Signed-off-by: Tim Chen <[email protected]>
> Tested-by: Chen Yu <[email protected]>
> Tested-by: Walter Mack <[email protected]>
> ---
> kernel/sched/fair.c | 33 +++++++++++++++++++++++++++++----
> kernel/sched/features.h | 2 ++
> 2 files changed, 31 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2498e97804fd..c5d2cb3a8f42 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4223,6 +4223,27 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
> se->vruntime = max_vruntime(se->vruntime, vruntime);
> }
>
> +static void place_entity_migrate(struct cfs_rq *cfs_rq, struct sched_entity *se)
> +{
> + if (!sched_feat(PLACE_MIGRATE))
> + return;
> +
> + if (cfs_rq->nr_running < se->migrated) {
> + /*
> + * Migrated to a shorter runqueue, go first because
> + * we were under-served on the old runqueue.
> + */
> + se->vruntime = cfs_rq->min_vruntime;
> + return;
> + }
> +
> + /*
> + * Migrated to a longer runqueue, go last because
> + * we got over-served on the old runqueue.
> + */
> + se->vruntime = cfs_rq->min_vruntime + sched_vslice(cfs_rq, se);
> +}
Should se->migrated be cleared after place_entity_migrate?
> +
> static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
>
> static inline bool cfs_bandwidth_used(void);
> @@ -4296,6 +4317,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>
> if (flags & ENQUEUE_WAKEUP)
> place_entity(cfs_rq, se, 0);
> + else if (se->migrated)
> + place_entity_migrate(cfs_rq, se);
>
> check_schedstat_required();
> update_stats_enqueue_fair(cfs_rq, se, flags);
> @@ -6930,6 +6953,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
> */
> static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
> {
> + struct sched_entity *se = &p->se;
> /*
> * As blocked tasks retain absolute vruntime the migration needs to
> * deal with this by subtracting the old and adding the new
> @@ -6962,7 +6986,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
> * rq->lock and can modify state directly.
> */
> lockdep_assert_rq_held(task_rq(p));
> - detach_entity_cfs_rq(&p->se);
> + detach_entity_cfs_rq(se);
>
> } else {
> /*
> @@ -6973,14 +6997,15 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
> * wakee task is less decayed, but giving the wakee more load
> * sounds not bad.
> */
> - remove_entity_load_avg(&p->se);
> + remove_entity_load_avg(se);
> }
>
> /* Tell new CPU we are migrated */
> - p->se.avg.last_update_time = 0;
> + se->avg.last_update_time = 0;
>
> /* We have migrated, no longer consider this task hot */
> - p->se.migrated = 1;
> + for_each_sched_entity(se)
> + se->migrated = READ_ONCE(cfs_rq_of(se)->nr_running) + !se->on_rq;
>
> update_scan_period(p, new_cpu);
> }
> diff --git a/kernel/sched/features.h b/kernel/sched/features.h
> index 1cf435bbcd9c..681c84fd062c 100644
> --- a/kernel/sched/features.h
> +++ b/kernel/sched/features.h
> @@ -100,3 +100,5 @@ SCHED_FEAT(LATENCY_WARN, false)
>
> SCHED_FEAT(ALT_PERIOD, true)
> SCHED_FEAT(BASE_SLICE, true)
> +
> +SCHED_FEAT(PLACE_MIGRATE, true)
On Tue, 2022-03-29 at 18:09 +0800, Abel Wu wrote:
> >
>
> >
> > +static void place_entity_migrate(struct cfs_rq *cfs_rq, struct sched_entity *se)
> > +{
> > + if (!sched_feat(PLACE_MIGRATE))
> > + return;
> > +
> > + if (cfs_rq->nr_running < se->migrated) {
> > + /*
> > + * Migrated to a shorter runqueue, go first because
> > + * we were under-served on the old runqueue.
> > + */
> > + se->vruntime = cfs_rq->min_vruntime;
> > + return;
> > + }
> > +
> > + /*
> > + * Migrated to a longer runqueue, go last because
> > + * we got over-served on the old runqueue.
> > + */
> > + se->vruntime = cfs_rq->min_vruntime + sched_vslice(cfs_rq, se);
> > +}
>
> Should se->migrated be cleared after place_entity_migrate?
>
It is cleared in update_stats_curr_start() before we actually run the task.
If the task gets migrated again before we run it, the se->migrated will also
be updated in migrate_task_rq_fair(). Looks like there is no need to
clear it here.
Tim
Greeting,
FYI, we noticed a -25.9% regression of hackbench.throughput due to commit:
commit: ddb3b1126fe93e8d6e05960048840e6d6edbb7b6 ("[PATCH 2/2] sched/fair: Simple runqueue order on migrate")
url: https://github.com/intel-lab-lkp/linux/commits/Tim-Chen/sched-fair-Fix-starvation-caused-by-task-migration/20220326-075316
base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git ffea9fb319360b9ead8befac6bb2db2b54fd53e6
patch link: https://lore.kernel.org/lkml/f59f1dfeec4692e7a373cc810168912a2d2f8f3c.1648228023.git.tim.c.chen@linux.intel.com
in testcase: hackbench
on test machine: 16 threads 1 sockets Intel(R) Xeon(R) E-2278G CPU @ 3.40GHz with 32G memory
with following parameters:
nr_threads: 100%
iterations: 4
mode: threads
ipc: socket
cpufreq_governor: performance
ucode: 0xec
test-description: Hackbench is both a benchmark and a stress test for the Linux kernel scheduler.
test-url: https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/sched/cfs-scheduler/hackbench.c
If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>
Details are as below:
-------------------------------------------------------------------------------------------------->
To reproduce:
git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
sudo bin/lkp install job.yaml # job file is attached in this email
bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
sudo bin/lkp run generated-yaml-file
# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.
=========================================================================================
compiler/cpufreq_governor/ipc/iterations/kconfig/mode/nr_threads/rootfs/tbox_group/testcase/ucode:
gcc-9/performance/socket/4/x86_64-rhel-8.3/threads/100%/debian-10.4-x86_64-20200603.cgz/lkp-cfl-e1/hackbench/0xec
commit:
d9ad181b93 ("sched/fair: Don't rely on ->exec_start for migration")
ddb3b1126f ("sched/fair: Simple runqueue order on migrate")
d9ad181b93f83b36 ddb3b1126fe93e8d6e059600488
---------------- ---------------------------
%stddev %change %stddev
\ | \
92566 -25.9% 68598 ? 2% hackbench.throughput
85.50 +40.8% 120.35 ? 2% hackbench.time.elapsed_time
85.50 +40.8% 120.35 ? 2% hackbench.time.elapsed_time.max
9674391 ? 2% +466.2% 54780205 ? 2% hackbench.time.involuntary_context_switches
1240 +42.6% 1768 ? 3% hackbench.time.system_time
89.58 +20.4% 107.85 hackbench.time.user_time
36626407 +270.2% 1.356e+08 ? 2% hackbench.time.voluntary_context_switches
2500490 ? 3% +78.2% 4456808 ? 3% cpuidle..usage
118.45 ? 2% +29.2% 153.02 ? 2% uptime.boot
0.93 -0.1 0.81 mpstat.cpu.all.irq%
6.61 ? 2% -0.7 5.87 ? 2% mpstat.cpu.all.usr%
2512245 +20.5% 3026652 ? 7% vmstat.memory.cache
544441 +186.6% 1560098 ? 2% vmstat.system.cs
119990 +178.6% 334293 ? 3% vmstat.system.in
2304 +1133.2% 28422 ? 41% meminfo.Active
2304 +1133.2% 28422 ? 41% meminfo.Active(anon)
88024 ? 4% +29.7% 114209 meminfo.AnonHugePages
2435281 +20.2% 2927665 ? 7% meminfo.Cached
350654 +141.0% 844991 ? 26% meminfo.Committed_AS
8189952 ? 11% +14.8% 9398954 ? 5% meminfo.DirectMap2M
250813 +187.6% 721280 ? 29% meminfo.Inactive
250813 +187.6% 721280 ? 29% meminfo.Inactive(anon)
31549 +263.8% 114791 ? 46% meminfo.Mapped
3074603 +15.8% 3560302 ? 6% meminfo.Memused
10473 +4701.4% 502857 ? 43% meminfo.Shmem
3077682 +16.2% 3576435 ? 5% meminfo.max_used_kB
1130190 ? 3% +42.8% 1614074 ? 3% turbostat.C1
0.60 ? 3% -0.2 0.42 ? 3% turbostat.C1%
20997 ? 75% -83.4% 3478 ? 25% turbostat.C1E
0.12 ?109% -0.1 0.01 ? 82% turbostat.C1E%
2.15 ? 6% -39.0% 1.31 ? 20% turbostat.CPU%c1
64.33 ? 5% +12.7% 72.50 ? 3% turbostat.CoreTmp
30.91 ?150% -99.3% 0.21 ? 8% turbostat.IPC
10771253 +288.2% 41809146 ? 2% turbostat.IRQ
1259955 ? 5% +116.5% 2728148 ? 5% turbostat.POLL
0.24 ? 4% +0.1 0.32 ? 6% turbostat.POLL%
0.59 ? 26% -88.4% 0.07 ?134% turbostat.Pkg%pc2
1.03 ? 90% -95.5% 0.05 ? 90% turbostat.Pkg%pc3
62.50 ? 4% +15.2% 72.00 ? 5% turbostat.PkgTmp
3.60 -24.0% 2.73 turbostat.RAMWatt
575.17 +1065.8% 6705 ? 49% proc-vmstat.nr_active_anon
60709 +1.4% 61578 proc-vmstat.nr_anon_pages
729380 -1.6% 717382 proc-vmstat.nr_dirty_background_threshold
1460544 -1.6% 1436519 proc-vmstat.nr_dirty_threshold
608819 +20.0% 730638 ? 7% proc-vmstat.nr_file_pages
7370339 -1.6% 7250228 proc-vmstat.nr_free_pages
62712 +186.1% 179444 ? 29% proc-vmstat.nr_inactive_anon
8133 +250.3% 28494 ? 46% proc-vmstat.nr_mapped
2617 +4654.3% 124436 ? 44% proc-vmstat.nr_shmem
19224 +1.3% 19478 proc-vmstat.nr_slab_reclaimable
20146 -7.5% 18626 proc-vmstat.nr_slab_unreclaimable
575.17 +1065.8% 6705 ? 49% proc-vmstat.nr_zone_active_anon
62712 +186.1% 179444 ? 29% proc-vmstat.nr_zone_inactive_anon
337848 +273.7% 1262543 ? 22% proc-vmstat.numa_hit
337954 +273.6% 1262732 ? 22% proc-vmstat.numa_local
290.17 ? 6% +17333.2% 50585 ?178% proc-vmstat.pgactivate
337910 +273.6% 1262506 ? 22% proc-vmstat.pgalloc_normal
208172 +47.8% 307671 ? 6% proc-vmstat.pgfault
341803 +31.1% 448267 ? 23% proc-vmstat.pgfree
11963 ? 3% +22.2% 14624 ? 2% proc-vmstat.pgreuse
41.33 ? 7% +38.8% 57.35 perf-stat.i.MPKI
9.472e+09 -10.1% 8.515e+09 ? 2% perf-stat.i.branch-instructions
76415175 +10.9% 84779467 ? 2% perf-stat.i.branch-misses
15.99 -9.9 6.04 ? 4% perf-stat.i.cache-miss-rate%
3.093e+08 -54.1% 1.421e+08 ? 3% perf-stat.i.cache-misses
1.95e+09 +24.7% 2.433e+09 ? 2% perf-stat.i.cache-references
560253 ? 2% +185.0% 1596715 ? 2% perf-stat.i.context-switches
1.27 ? 4% +15.2% 1.47 ? 3% perf-stat.i.cpi
63663 +9.0% 69379 ? 5% perf-stat.i.cpu-migrations
220.61 ? 13% +270.2% 816.61 ? 15% perf-stat.i.cycles-between-cache-misses
1.441e+10 -12.8% 1.257e+10 ? 2% perf-stat.i.dTLB-loads
8.621e+09 -12.9% 7.505e+09 ? 2% perf-stat.i.dTLB-stores
40563294 ? 2% -18.9% 32876799 ? 3% perf-stat.i.iTLB-load-misses
384404 ? 2% +4.7% 402492 perf-stat.i.iTLB-loads
4.879e+10 -11.4% 4.323e+10 ? 2% perf-stat.i.instructions
1218 +13.2% 1379 ? 7% perf-stat.i.instructions-per-iTLB-miss
0.80 -12.2% 0.70 ? 2% perf-stat.i.ipc
0.05 ? 77% +483.0% 0.30 ? 6% perf-stat.i.major-faults
0.03 ? 20% -100.0% 0.00 perf-stat.i.metric.G/sec
932.18 ? 2% -32.9% 625.58 ? 2% perf-stat.i.metric.K/sec
2127 -9.6% 1923 ? 2% perf-stat.i.metric.M/sec
1654 ? 2% +20.7% 1996 ? 5% perf-stat.i.minor-faults
13758518 ? 2% -55.8% 6082651 ? 3% perf-stat.i.node-loads
52659547 -57.3% 22477918 ? 4% perf-stat.i.node-stores
1654 ? 2% +20.7% 1997 ? 5% perf-stat.i.page-faults
39.96 +40.7% 56.22 perf-stat.overall.MPKI
0.81 +0.2 0.99 perf-stat.overall.branch-miss-rate%
15.86 -10.2 5.71 ? 3% perf-stat.overall.cache-miss-rate%
1.25 +14.6% 1.43 ? 2% perf-stat.overall.cpi
196.89 +126.5% 446.05 ? 4% perf-stat.overall.cycles-between-cache-misses
1203 ? 2% +10.0% 1323 ? 2% perf-stat.overall.instructions-per-iTLB-miss
0.80 -12.7% 0.70 ? 2% perf-stat.overall.ipc
9.364e+09 -10.1% 8.414e+09 ? 2% perf-stat.ps.branch-instructions
75547552 +10.1% 83192410 ? 2% perf-stat.ps.branch-misses
3.058e+08 -55.2% 1.369e+08 ? 4% perf-stat.ps.cache-misses
1.928e+09 +24.3% 2.396e+09 ? 2% perf-stat.ps.cache-references
553796 ? 2% +185.8% 1582541 ? 2% perf-stat.ps.context-switches
62898 +7.0% 67281 ? 5% perf-stat.ps.cpu-migrations
1.425e+10 -13.3% 1.235e+10 ? 2% perf-stat.ps.dTLB-loads
8.522e+09 -13.5% 7.371e+09 ? 2% perf-stat.ps.dTLB-stores
40098951 ? 2% -19.6% 32229848 ? 3% perf-stat.ps.iTLB-load-misses
4.824e+10 -11.6% 4.263e+10 ? 2% perf-stat.ps.instructions
0.05 ? 77% +509.9% 0.31 ? 6% perf-stat.ps.major-faults
1635 ? 2% +17.9% 1927 ? 5% perf-stat.ps.minor-faults
13602325 ? 2% -57.0% 5850907 ? 3% perf-stat.ps.node-loads
52060595 -58.3% 21688976 ? 5% perf-stat.ps.node-stores
1635 ? 2% +17.9% 1928 ? 5% perf-stat.ps.page-faults
4.241e+12 +23.4% 5.233e+12 perf-stat.total.instructions
34.13 ? 17% -33.7 0.41 ? 72% perf-profile.calltrace.cycles-pp.secondary_startup_64_no_verify
33.70 ? 16% -33.4 0.31 ?101% perf-profile.calltrace.cycles-pp.cpu_startup_entry.start_secondary.secondary_startup_64_no_verify
33.70 ? 16% -33.4 0.31 ?101% perf-profile.calltrace.cycles-pp.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64_no_verify
33.70 ? 16% -33.4 0.31 ?101% perf-profile.calltrace.cycles-pp.start_secondary.secondary_startup_64_no_verify
33.28 ? 16% -33.2 0.08 ?223% perf-profile.calltrace.cycles-pp.cpuidle_enter.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64_no_verify
33.28 ? 16% -33.2 0.08 ?223% perf-profile.calltrace.cycles-pp.cpuidle_enter_state.cpuidle_enter.do_idle.cpu_startup_entry.start_secondary
31.16 ? 13% -31.2 0.00 perf-profile.calltrace.cycles-pp.intel_idle.cpuidle_enter_state.cpuidle_enter.do_idle.cpu_startup_entry
16.93 ? 42% -16.9 0.00 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe
16.93 ? 42% -16.9 0.00 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe
12.61 ? 47% -12.6 0.00 perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
12.61 ? 47% -12.6 0.00 perf-profile.calltrace.cycles-pp.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
12.61 ? 47% -12.6 0.00 perf-profile.calltrace.cycles-pp.arch_do_signal_or_restart.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
12.61 ? 47% -12.6 0.00 perf-profile.calltrace.cycles-pp.get_signal.arch_do_signal_or_restart.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64
12.61 ? 47% -12.6 0.00 perf-profile.calltrace.cycles-pp.do_group_exit.get_signal.arch_do_signal_or_restart.exit_to_user_mode_prepare.syscall_exit_to_user_mode
12.61 ? 47% -12.6 0.00 perf-profile.calltrace.cycles-pp.do_exit.do_group_exit.get_signal.arch_do_signal_or_restart.exit_to_user_mode_prepare
9.73 ? 82% -9.7 0.00 perf-profile.calltrace.cycles-pp.mmput.do_exit.do_group_exit.get_signal.arch_do_signal_or_restart
9.73 ? 82% -9.7 0.00 perf-profile.calltrace.cycles-pp.exit_mmap.mmput.do_exit.do_group_exit.get_signal
9.25 ?102% -9.3 0.00 perf-profile.calltrace.cycles-pp._dl_catch_error
4.97 ? 97% -5.0 0.00 perf-profile.calltrace.cycles-pp.tlb_finish_mmu.exit_mmap.mmput.do_exit.do_group_exit
4.97 ? 97% -5.0 0.00 perf-profile.calltrace.cycles-pp.tlb_flush_mmu.tlb_finish_mmu.exit_mmap.mmput.do_exit
4.52 ?115% -4.5 0.00 perf-profile.calltrace.cycles-pp.__get_cpu_features
4.38 ?118% -4.4 0.00 perf-profile.calltrace.cycles-pp.release_pages.tlb_flush_mmu.tlb_finish_mmu.exit_mmap.mmput
0.00 +0.6 0.62 ? 18% perf-profile.calltrace.cycles-pp.select_idle_cpu.select_idle_sibling.select_task_rq_fair.try_to_wake_up.autoremove_wake_function
0.00 +0.6 0.62 ? 17% perf-profile.calltrace.cycles-pp.ttwu_do_wakeup.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
0.00 +0.6 0.62 ? 11% perf-profile.calltrace.cycles-pp.__fget_light.__fdget_pos.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.00 +0.6 0.63 ? 7% perf-profile.calltrace.cycles-pp._raw_spin_lock.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter.new_sync_read
0.00 +0.6 0.64 ? 12% perf-profile.calltrace.cycles-pp.__fdget_pos.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
0.00 +0.7 0.67 ? 7% perf-profile.calltrace.cycles-pp.mutex_lock.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter.new_sync_read
0.00 +0.7 0.72 ? 24% perf-profile.calltrace.cycles-pp.pick_next_task_fair.__schedule.schedule.schedule_timeout.unix_stream_read_generic
0.00 +0.8 0.75 ? 12% perf-profile.calltrace.cycles-pp.security_socket_recvmsg.sock_recvmsg.sock_read_iter.new_sync_read.vfs_read
0.00 +0.8 0.83 ? 19% perf-profile.calltrace.cycles-pp.skb_queue_tail.unix_stream_sendmsg.sock_sendmsg.sock_write_iter.new_sync_write
0.00 +0.8 0.84 ? 13% perf-profile.calltrace.cycles-pp.sock_recvmsg.sock_read_iter.new_sync_read.vfs_read.ksys_read
0.00 +0.8 0.84 ? 17% perf-profile.calltrace.cycles-pp.__check_object_size.skb_copy_datagram_from_iter.unix_stream_sendmsg.sock_sendmsg.sock_write_iter
0.00 +0.9 0.86 ? 19% perf-profile.calltrace.cycles-pp.skb_set_owner_w.sock_alloc_send_pskb.unix_stream_sendmsg.sock_sendmsg.sock_write_iter
0.00 +0.9 0.89 ? 4% perf-profile.calltrace.cycles-pp.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
0.00 +1.0 0.98 ? 5% perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
0.00 +1.0 1.04 ? 18% perf-profile.calltrace.cycles-pp.select_idle_sibling.select_task_rq_fair.try_to_wake_up.autoremove_wake_function.__wake_up_common
0.00 +1.0 1.04 ? 14% perf-profile.calltrace.cycles-pp.__slab_free.skb_release_data.consume_skb.unix_stream_read_generic.unix_stream_recvmsg
0.00 +1.1 1.06 ? 13% perf-profile.calltrace.cycles-pp.__slab_free.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter.new_sync_read
0.00 +1.3 1.26 ? 14% perf-profile.calltrace.cycles-pp.select_task_rq_fair.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
0.00 +1.3 1.29 ? 17% perf-profile.calltrace.cycles-pp._copy_from_iter.skb_copy_datagram_from_iter.unix_stream_sendmsg.sock_sendmsg.sock_write_iter
0.00 +1.4 1.36 ? 40% perf-profile.calltrace.cycles-pp.perf_tp_event.perf_trace_sched_switch.__schedule.schedule.exit_to_user_mode_prepare
0.00 +1.4 1.39 ? 14% perf-profile.calltrace.cycles-pp.__check_object_size.simple_copy_to_iter.__skb_datagram_iter.skb_copy_datagram_iter.unix_stream_read_actor
0.00 +1.4 1.42 ? 39% perf-profile.calltrace.cycles-pp.perf_trace_sched_switch.__schedule.schedule.exit_to_user_mode_prepare.syscall_exit_to_user_mode
0.00 +1.5 1.45 ? 14% perf-profile.calltrace.cycles-pp.simple_copy_to_iter.__skb_datagram_iter.skb_copy_datagram_iter.unix_stream_read_actor.unix_stream_read_generic
0.00 +1.5 1.47 ? 19% perf-profile.calltrace.cycles-pp._raw_spin_lock.unix_stream_sendmsg.sock_sendmsg.sock_write_iter.new_sync_write
0.00 +1.6 1.61 ? 16% perf-profile.calltrace.cycles-pp.__fget_light.__fdget_pos.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.00 +1.6 1.63 ? 17% perf-profile.calltrace.cycles-pp.__fdget_pos.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
0.00 +1.6 1.64 ? 20% perf-profile.calltrace.cycles-pp.copy_user_enhanced_fast_string.copyout._copy_to_iter.__skb_datagram_iter.skb_copy_datagram_iter
0.00 +1.7 1.69 ? 20% perf-profile.calltrace.cycles-pp.sock_wfree.unix_destruct_scm.skb_release_head_state.skb_release_all.consume_skb
0.00 +1.7 1.72 ? 16% perf-profile.calltrace.cycles-pp.kmem_cache_free.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter.new_sync_read
0.00 +1.7 1.74 ? 20% perf-profile.calltrace.cycles-pp.copyout._copy_to_iter.__skb_datagram_iter.skb_copy_datagram_iter.unix_stream_read_actor
0.00 +1.8 1.77 ? 25% perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.__schedule.schedule.schedule_timeout
0.00 +1.8 1.83 ? 20% perf-profile.calltrace.cycles-pp.unix_destruct_scm.skb_release_head_state.skb_release_all.consume_skb.unix_stream_read_generic
0.00 +1.9 1.87 ? 25% perf-profile.calltrace.cycles-pp._raw_spin_lock.__schedule.schedule.schedule_timeout.unix_stream_read_generic
0.00 +1.9 1.88 ? 20% perf-profile.calltrace.cycles-pp.skb_release_head_state.skb_release_all.consume_skb.unix_stream_read_generic.unix_stream_recvmsg
0.00 +1.9 1.91 ? 20% perf-profile.calltrace.cycles-pp.skb_release_all.consume_skb.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter
0.00 +2.0 1.96 ? 15% perf-profile.calltrace.cycles-pp.kfree.skb_release_data.consume_skb.unix_stream_read_generic.unix_stream_recvmsg
0.00 +2.2 2.18 ? 18% perf-profile.calltrace.cycles-pp._copy_to_iter.__skb_datagram_iter.skb_copy_datagram_iter.unix_stream_read_actor.unix_stream_read_generic
0.00 +2.3 2.28 ? 17% perf-profile.calltrace.cycles-pp.skb_copy_datagram_from_iter.unix_stream_sendmsg.sock_sendmsg.sock_write_iter.new_sync_write
0.00 +2.5 2.49 ? 41% perf-profile.calltrace.cycles-pp.__schedule.schedule.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64
0.00 +2.5 2.52 ? 40% perf-profile.calltrace.cycles-pp.schedule.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.00 +2.8 2.75 ? 38% perf-profile.calltrace.cycles-pp.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
0.00 +2.8 2.76 ? 17% perf-profile.calltrace.cycles-pp.kmem_cache_alloc_node.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb.unix_stream_sendmsg
0.00 +2.8 2.85 ? 37% perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
0.00 +3.2 3.19 ? 15% perf-profile.calltrace.cycles-pp.__kmalloc_node_track_caller.kmalloc_reserve.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb
0.00 +3.2 3.19 ? 15% perf-profile.calltrace.cycles-pp.skb_release_data.consume_skb.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter
0.00 +3.3 3.33 ? 15% perf-profile.calltrace.cycles-pp.kmalloc_reserve.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb.unix_stream_sendmsg
0.00 +3.7 3.70 ? 23% perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.try_to_wake_up.autoremove_wake_function.__wake_up_common
0.00 +3.7 3.75 ? 16% perf-profile.calltrace.cycles-pp.__skb_datagram_iter.skb_copy_datagram_iter.unix_stream_read_actor.unix_stream_read_generic.unix_stream_recvmsg
0.00 +3.8 3.79 ? 16% perf-profile.calltrace.cycles-pp.skb_copy_datagram_iter.unix_stream_read_actor.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter
0.00 +3.8 3.83 ? 16% perf-profile.calltrace.cycles-pp.unix_stream_read_actor.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter.new_sync_read
0.00 +3.9 3.88 ? 22% perf-profile.calltrace.cycles-pp._raw_spin_lock.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
0.00 +4.8 4.77 ? 25% perf-profile.calltrace.cycles-pp.perf_tp_event.perf_trace_sched_switch.__schedule.schedule.schedule_timeout
0.00 +5.0 4.96 ? 25% perf-profile.calltrace.cycles-pp.perf_trace_sched_switch.__schedule.schedule.schedule_timeout.unix_stream_read_generic
0.00 +5.2 5.21 ? 22% perf-profile.calltrace.cycles-pp.perf_event_output_forward.__perf_event_overflow.perf_swevent_overflow.perf_tp_event.perf_trace_sched_wakeup_template
0.00 +5.2 5.24 ? 22% perf-profile.calltrace.cycles-pp.__perf_event_overflow.perf_swevent_overflow.perf_tp_event.perf_trace_sched_wakeup_template.try_to_wake_up
0.00 +5.2 5.25 ? 22% perf-profile.calltrace.cycles-pp.perf_swevent_overflow.perf_tp_event.perf_trace_sched_wakeup_template.try_to_wake_up.autoremove_wake_function
0.00 +5.3 5.32 ? 23% perf-profile.calltrace.cycles-pp.perf_swevent_overflow.perf_tp_event.perf_trace_sched_stat_runtime.update_curr.dequeue_entity
0.00 +5.4 5.38 ? 17% perf-profile.calltrace.cycles-pp.consume_skb.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter.new_sync_read
0.00 +5.4 5.39 ? 24% perf-profile.calltrace.cycles-pp.perf_swevent_overflow.perf_tp_event.perf_trace_sched_stat_runtime.update_curr.enqueue_entity
0.43 ?223% +5.5 5.88 ? 27% perf-profile.calltrace.cycles-pp.perf_event_output_forward.__perf_event_overflow.perf_swevent_overflow.perf_tp_event.perf_trace_sched_switch
0.00 +5.5 5.48 ? 23% perf-profile.calltrace.cycles-pp.perf_tp_event.perf_trace_sched_stat_runtime.update_curr.dequeue_entity.dequeue_task_fair
0.43 ?223% +5.5 5.92 ? 27% perf-profile.calltrace.cycles-pp.__perf_event_overflow.perf_swevent_overflow.perf_tp_event.perf_trace_sched_switch.__schedule
0.00 +5.5 5.54 ? 24% perf-profile.calltrace.cycles-pp.perf_tp_event.perf_trace_sched_stat_runtime.update_curr.enqueue_entity.enqueue_task_fair
0.00 +5.6 5.57 ? 21% perf-profile.calltrace.cycles-pp.perf_tp_event.perf_trace_sched_wakeup_template.try_to_wake_up.autoremove_wake_function.__wake_up_common
0.00 +5.6 5.60 ? 23% perf-profile.calltrace.cycles-pp.perf_trace_sched_stat_runtime.update_curr.dequeue_entity.dequeue_task_fair.__schedule
0.00 +5.7 5.72 ? 24% perf-profile.calltrace.cycles-pp.perf_trace_sched_stat_runtime.update_curr.enqueue_entity.enqueue_task_fair.ttwu_do_activate
0.00 +5.7 5.74 ? 21% perf-profile.calltrace.cycles-pp.perf_trace_sched_wakeup_template.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
0.00 +5.8 5.80 ? 30% perf-profile.calltrace.cycles-pp.unwind_next_frame.__unwind_start.perf_callchain_kernel.get_perf_callchain.perf_callchain
0.00 +5.9 5.93 ? 27% perf-profile.calltrace.cycles-pp.perf_swevent_overflow.perf_tp_event.perf_trace_sched_switch.__schedule.schedule
0.00 +6.0 6.02 ? 22% perf-profile.calltrace.cycles-pp.update_curr.dequeue_entity.dequeue_task_fair.__schedule.schedule
0.00 +6.2 6.19 ? 23% perf-profile.calltrace.cycles-pp.update_curr.enqueue_entity.enqueue_task_fair.ttwu_do_activate.try_to_wake_up
0.43 ?223% +6.3 6.71 ? 29% perf-profile.calltrace.cycles-pp.unwind_next_frame.perf_callchain_kernel.get_perf_callchain.perf_callchain.perf_prepare_sample
0.00 +6.4 6.44 ? 22% perf-profile.calltrace.cycles-pp.dequeue_entity.dequeue_task_fair.__schedule.schedule.schedule_timeout
0.00 +6.7 6.65 ? 29% perf-profile.calltrace.cycles-pp.__unwind_start.perf_callchain_kernel.get_perf_callchain.perf_callchain.perf_prepare_sample
0.00 +6.9 6.86 ? 33% perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock_irqsave.__wake_up_common_lock.sock_def_readable.unix_stream_sendmsg
0.00 +7.1 7.07 ? 32% perf-profile.calltrace.cycles-pp._raw_spin_lock_irqsave.__wake_up_common_lock.sock_def_readable.unix_stream_sendmsg.sock_sendmsg
0.00 +7.2 7.19 ? 22% perf-profile.calltrace.cycles-pp.enqueue_entity.enqueue_task_fair.ttwu_do_activate.try_to_wake_up.autoremove_wake_function
0.00 +7.2 7.21 ? 16% perf-profile.calltrace.cycles-pp.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb.unix_stream_sendmsg.sock_sendmsg
0.00 +7.3 7.34 ? 21% perf-profile.calltrace.cycles-pp.dequeue_task_fair.__schedule.schedule.schedule_timeout.unix_stream_read_generic
0.00 +7.3 7.35 ? 16% perf-profile.calltrace.cycles-pp.alloc_skb_with_frags.sock_alloc_send_pskb.unix_stream_sendmsg.sock_sendmsg.sock_write_iter
0.00 +8.1 8.08 ? 22% perf-profile.calltrace.cycles-pp.enqueue_task_fair.ttwu_do_activate.try_to_wake_up.autoremove_wake_function.__wake_up_common
0.00 +8.1 8.13 ? 22% perf-profile.calltrace.cycles-pp.ttwu_do_activate.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
0.00 +9.2 9.20 ? 17% perf-profile.calltrace.cycles-pp.sock_alloc_send_pskb.unix_stream_sendmsg.sock_sendmsg.sock_write_iter.new_sync_write
0.00 +10.6 10.62 ? 23% perf-profile.calltrace.cycles-pp.perf_event_output_forward.__perf_event_overflow.perf_swevent_overflow.perf_tp_event.perf_trace_sched_stat_runtime
0.00 +10.7 10.68 ? 23% perf-profile.calltrace.cycles-pp.__perf_event_overflow.perf_swevent_overflow.perf_tp_event.perf_trace_sched_stat_runtime.update_curr
0.00 +16.2 16.16 ? 22% perf-profile.calltrace.cycles-pp.__schedule.schedule.schedule_timeout.unix_stream_read_generic.unix_stream_recvmsg
0.00 +16.3 16.26 ? 22% perf-profile.calltrace.cycles-pp.schedule.schedule_timeout.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter
0.00 +16.4 16.40 ? 22% perf-profile.calltrace.cycles-pp.schedule_timeout.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter.new_sync_read
0.43 ?223% +17.1 17.53 ? 27% perf-profile.calltrace.cycles-pp.perf_callchain_kernel.get_perf_callchain.perf_callchain.perf_prepare_sample.perf_event_output_forward
0.43 ?223% +18.4 18.82 ? 27% perf-profile.calltrace.cycles-pp.get_perf_callchain.perf_callchain.perf_prepare_sample.perf_event_output_forward.__perf_event_overflow
0.43 ?223% +18.5 18.93 ? 27% perf-profile.calltrace.cycles-pp.perf_callchain.perf_prepare_sample.perf_event_output_forward.__perf_event_overflow.perf_swevent_overflow
0.43 ?223% +19.7 20.08 ? 27% perf-profile.calltrace.cycles-pp.perf_prepare_sample.perf_event_output_forward.__perf_event_overflow.perf_swevent_overflow.perf_tp_event
0.00 +20.9 20.85 ? 20% perf-profile.calltrace.cycles-pp.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock.sock_def_readable
0.00 +21.0 21.00 ? 20% perf-profile.calltrace.cycles-pp.autoremove_wake_function.__wake_up_common.__wake_up_common_lock.sock_def_readable.unix_stream_sendmsg
0.00 +21.2 21.22 ? 20% perf-profile.calltrace.cycles-pp.__wake_up_common.__wake_up_common_lock.sock_def_readable.unix_stream_sendmsg.sock_sendmsg
0.00 +28.8 28.83 ? 10% perf-profile.calltrace.cycles-pp.__wake_up_common_lock.sock_def_readable.unix_stream_sendmsg.sock_sendmsg.sock_write_iter
0.00 +29.6 29.63 ? 9% perf-profile.calltrace.cycles-pp.sock_def_readable.unix_stream_sendmsg.sock_sendmsg.sock_write_iter.new_sync_write
5.71 ?153% +30.5 36.18 ? 3% perf-profile.calltrace.cycles-pp.vfs_read.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
5.71 ?153% +31.5 37.18 ? 3% perf-profile.calltrace.cycles-pp.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
5.71 ?153% +32.7 38.42 ? 3% perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
0.00 +32.8 32.80 ? 4% perf-profile.calltrace.cycles-pp.unix_stream_read_generic.unix_stream_recvmsg.sock_read_iter.new_sync_read.vfs_read
5.71 ?153% +32.9 38.64 ? 2% perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.read
0.00 +33.1 33.11 ? 4% perf-profile.calltrace.cycles-pp.unix_stream_recvmsg.sock_read_iter.new_sync_read.vfs_read.ksys_read
1.02 ?144% +33.9 34.90 ? 3% perf-profile.calltrace.cycles-pp.new_sync_read.vfs_read.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.00 +34.5 34.47 ? 4% perf-profile.calltrace.cycles-pp.sock_read_iter.new_sync_read.vfs_read.ksys_read.do_syscall_64
5.71 ?153% +34.7 40.45 ? 2% perf-profile.calltrace.cycles-pp.read
3.61 ? 80% +44.6 48.16 ? 2% perf-profile.calltrace.cycles-pp.new_sync_write.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.00 +45.6 45.63 ? 2% perf-profile.calltrace.cycles-pp.unix_stream_sendmsg.sock_sendmsg.sock_write_iter.new_sync_write.vfs_write
3.61 ? 80% +46.2 49.86 ? 2% perf-profile.calltrace.cycles-pp.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
0.00 +46.4 46.45 ? 2% perf-profile.calltrace.cycles-pp.sock_sendmsg.sock_write_iter.new_sync_write.vfs_write.ksys_write
0.00 +47.1 47.11 ? 2% perf-profile.calltrace.cycles-pp.sock_write_iter.new_sync_write.vfs_write.ksys_write.do_syscall_64
3.61 ? 80% +48.4 52.04 ? 3% perf-profile.calltrace.cycles-pp.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
3.61 ? 80% +51.6 55.17 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
3.61 ? 80% +51.8 55.43 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.write
3.61 ? 80% +53.2 56.77 perf-profile.calltrace.cycles-pp.write
34.13 ? 17% -33.6 0.51 ? 37% perf-profile.children.cycles-pp.do_idle
34.13 ? 17% -33.6 0.51 ? 37% perf-profile.children.cycles-pp.secondary_startup_64_no_verify
34.13 ? 17% -33.6 0.51 ? 37% perf-profile.children.cycles-pp.cpu_startup_entry
33.70 ? 16% -33.2 0.49 ? 37% perf-profile.children.cycles-pp.start_secondary
33.28 ? 16% -33.0 0.33 ? 38% perf-profile.children.cycles-pp.cpuidle_enter_state
33.28 ? 16% -32.9 0.33 ? 38% perf-profile.children.cycles-pp.cpuidle_enter
31.16 ? 13% -31.0 0.19 ? 36% perf-profile.children.cycles-pp.intel_idle
16.93 ? 42% -16.9 0.00 perf-profile.children.cycles-pp.do_group_exit
16.93 ? 42% -16.9 0.00 perf-profile.children.cycles-pp.do_exit
14.48 ? 62% -14.5 0.00 perf-profile.children.cycles-pp.mmput
14.48 ? 62% -14.5 0.00 perf-profile.children.cycles-pp.exit_mmap
12.61 ? 47% -12.6 0.00 perf-profile.children.cycles-pp.arch_do_signal_or_restart
12.61 ? 47% -12.6 0.00 perf-profile.children.cycles-pp.get_signal
12.58 ?116% -12.6 0.00 perf-profile.children.cycles-pp._dl_catch_error
14.84 ? 36% -11.0 3.87 ? 26% perf-profile.children.cycles-pp.syscall_exit_to_user_mode
14.41 ? 36% -10.7 3.68 ? 27% perf-profile.children.cycles-pp.exit_to_user_mode_prepare
8.25 ?115% -8.3 0.00 perf-profile.children.cycles-pp.do_mmap
8.25 ?115% -8.3 0.00 perf-profile.children.cycles-pp.mmap_region
8.25 ?115% -8.3 0.00 perf-profile.children.cycles-pp.vm_mmap_pgoff
7.28 ?119% -7.3 0.00 perf-profile.children.cycles-pp.asm_exc_page_fault
5.40 ? 83% -5.4 0.00 perf-profile.children.cycles-pp.ksys_mmap_pgoff
4.97 ? 97% -5.0 0.00 perf-profile.children.cycles-pp.tlb_finish_mmu
4.97 ? 97% -5.0 0.00 perf-profile.children.cycles-pp.tlb_flush_mmu
4.52 ?115% -4.5 0.00 perf-profile.children.cycles-pp.__get_cpu_features
4.38 ?118% -4.4 0.00 perf-profile.children.cycles-pp.release_pages
4.19 ?122% -4.2 0.00 perf-profile.children.cycles-pp.walk_component
0.00 +0.1 0.06 ? 19% perf-profile.children.cycles-pp.cgroup_rstat_updated
0.00 +0.1 0.06 ? 19% perf-profile.children.cycles-pp.is_ftrace_trampoline
0.00 +0.1 0.06 ? 11% perf-profile.children.cycles-pp.__cgroup_account_cputime
0.00 +0.1 0.07 ? 28% perf-profile.children.cycles-pp.rb_next
0.00 +0.1 0.07 ? 31% perf-profile.children.cycles-pp.perf_swevent_get_recursion_context
0.00 +0.1 0.07 ? 21% perf-profile.children.cycles-pp.iov_iter_init
0.00 +0.1 0.07 ? 18% perf-profile.children.cycles-pp.apparmor_socket_recvmsg
0.00 +0.1 0.07 ? 17% perf-profile.children.cycles-pp.put_pid
0.00 +0.1 0.07 ? 25% perf-profile.children.cycles-pp.__x2apic_send_IPI_dest
0.00 +0.1 0.07 ? 17% perf-profile.children.cycles-pp.resched_curr
0.00 +0.1 0.08 ? 18% perf-profile.children.cycles-pp.apparmor_file_permission
0.00 +0.1 0.08 ? 22% perf-profile.children.cycles-pp.cpumask_next
0.00 +0.1 0.08 ? 20% perf-profile.children.cycles-pp.get_stack_info_noinstr
0.00 +0.1 0.08 ? 17% perf-profile.children.cycles-pp.refill_stock
0.00 +0.1 0.09 ? 8% perf-profile.children.cycles-pp.maybe_add_creds
0.00 +0.1 0.09 ? 15% perf-profile.children.cycles-pp.__x64_sys_read
0.00 +0.1 0.09 ? 15% perf-profile.children.cycles-pp.__wrgsbase_inactive
0.00 +0.1 0.09 ? 21% perf-profile.children.cycles-pp.finish_wait
0.00 +0.1 0.09 ? 11% perf-profile.children.cycles-pp.__x64_sys_write
0.00 +0.1 0.10 ? 10% perf-profile.children.cycles-pp.start_thread
0.00 +0.1 0.10 ? 22% perf-profile.children.cycles-pp.__is_insn_slot_addr
0.00 +0.1 0.10 ? 23% perf-profile.children.cycles-pp.perf_instruction_pointer
0.00 +0.1 0.10 ? 13% perf-profile.children.cycles-pp.try_charge_memcg
0.00 +0.1 0.10 ? 12% perf-profile.children.cycles-pp.cpumask_next_wrap
0.00 +0.1 0.10 ? 16% perf-profile.children.cycles-pp.switch_mm_irqs_off
0.00 +0.1 0.10 ? 32% perf-profile.children.cycles-pp.sysvec_reschedule_ipi
0.00 +0.1 0.11 ? 21% perf-profile.children.cycles-pp.__build_skb_around
0.00 +0.1 0.11 ? 20% perf-profile.children.cycles-pp._find_next_bit
0.00 +0.1 0.11 ? 11% perf-profile.children.cycles-pp.perf_misc_flags
0.00 +0.1 0.11 ? 11% perf-profile.children.cycles-pp.rb_insert_color
0.00 +0.1 0.11 ? 9% perf-profile.children.cycles-pp.get_mem_cgroup_from_objcg
0.00 +0.1 0.12 ? 10% perf-profile.children.cycles-pp.apparmor_socket_getpeersec_dgram
0.00 +0.1 0.12 ? 21% perf-profile.children.cycles-pp.kmalloc_slab
0.00 +0.1 0.12 ? 31% perf-profile.children.cycles-pp.in_gate_area_no_mm
0.00 +0.1 0.12 ? 31% perf-profile.children.cycles-pp.__unfreeze_partials
0.00 +0.1 0.13 ? 21% perf-profile.children.cycles-pp.propagate_protected_usage
0.00 +0.1 0.13 ? 16% perf-profile.children.cycles-pp.syscall_enter_from_user_mode
0.00 +0.1 0.14 ? 20% perf-profile.children.cycles-pp.mem_cgroup_from_task
0.00 +0.1 0.14 ? 21% perf-profile.children.cycles-pp.get_stack_info
0.00 +0.1 0.14 ? 20% perf-profile.children.cycles-pp.tracing_gen_ctx_irq_test
0.00 +0.1 0.14 ? 22% perf-profile.children.cycles-pp.set_next_buddy
0.00 +0.1 0.15 ? 29% perf-profile.children.cycles-pp.rb_erase
0.00 +0.2 0.15 ? 30% perf-profile.children.cycles-pp.get_partial_node
0.00 +0.2 0.15 ? 28% perf-profile.children.cycles-pp.get_callchain_entry
0.00 +0.2 0.16 ? 21% perf-profile.children.cycles-pp.pick_next_entity
0.00 +0.2 0.16 ? 17% perf-profile.children.cycles-pp.entry_SYSCALL_64_safe_stack
0.00 +0.2 0.17 ? 16% perf-profile.children.cycles-pp.check_stack_object
0.00 +0.2 0.17 ? 24% perf-profile.children.cycles-pp.perf_trace_buf_alloc
0.00 +0.2 0.17 ? 51% perf-profile.children.cycles-pp.bpf_ksym_find
0.00 +0.2 0.18 ? 14% perf-profile.children.cycles-pp.refill_obj_stock
0.00 +0.2 0.18 ? 18% perf-profile.children.cycles-pp.__ksize
0.00 +0.2 0.18 ? 17% perf-profile.children.cycles-pp.___perf_sw_event
0.00 +0.2 0.20 ? 17% perf-profile.children.cycles-pp.kvm_guest_state
0.00 +0.2 0.21 ? 46% perf-profile.children.cycles-pp.is_bpf_text_address
0.00 +0.2 0.22 ? 8% perf-profile.children.cycles-pp.task_tick_fair
0.00 +0.2 0.22 ? 24% perf-profile.children.cycles-pp.put_prev_entity
0.00 +0.2 0.22 ? 26% perf-profile.children.cycles-pp.__calc_delta
0.00 +0.2 0.22 ? 30% perf-profile.children.cycles-pp.perf_tp_event_match
0.00 +0.2 0.23 ? 22% perf-profile.children.cycles-pp.perf_trace_buf_update
0.00 +0.2 0.24 ? 16% perf-profile.children.cycles-pp.update_min_vruntime
0.00 +0.2 0.24 ? 9% perf-profile.children.cycles-pp.security_socket_getpeersec_dgram
0.00 +0.2 0.24 ? 24% perf-profile.children.cycles-pp.save_fpregs_to_fpstate
0.00 +0.3 0.25 ? 72% perf-profile.children.cycles-pp.process_simple
0.00 +0.3 0.26 ? 18% perf-profile.children.cycles-pp.__switch_to
0.00 +0.3 0.26 ? 19% perf-profile.children.cycles-pp.update_cfs_group
0.00 +0.3 0.26 ? 6% perf-profile.children.cycles-pp.scheduler_tick
0.00 +0.3 0.27 ? 14% perf-profile.children.cycles-pp.rcu_all_qs
0.00 +0.3 0.27 ? 14% perf-profile.children.cycles-pp.perf_trace_sched_migrate_task
0.00 +0.3 0.28 ? 71% perf-profile.children.cycles-pp.record__finish_output
0.00 +0.3 0.28 ? 71% perf-profile.children.cycles-pp.perf_session__process_events
0.00 +0.3 0.29 ? 69% perf-profile.children.cycles-pp.__libc_start_main
0.00 +0.3 0.29 ? 69% perf-profile.children.cycles-pp.main
0.00 +0.3 0.29 ? 69% perf-profile.children.cycles-pp.run_builtin
0.00 +0.3 0.29 ? 69% perf-profile.children.cycles-pp.cmd_record
0.00 +0.3 0.29 ? 69% perf-profile.children.cycles-pp.cmd_sched
0.00 +0.3 0.29 ? 5% perf-profile.children.cycles-pp.update_process_times
0.00 +0.3 0.30 ? 6% perf-profile.children.cycles-pp.tick_sched_handle
0.00 +0.3 0.30 ? 14% perf-profile.children.cycles-pp.finish_task_switch
0.00 +0.3 0.31 ? 6% perf-profile.children.cycles-pp.tick_sched_timer
0.00 +0.3 0.31 ? 12% perf-profile.children.cycles-pp.set_task_cpu
0.00 +0.3 0.32 ? 22% perf-profile.children.cycles-pp.cpuacct_charge
0.00 +0.3 0.32 ? 10% perf-profile.children.cycles-pp.preempt_schedule_common
0.00 +0.3 0.33 ? 21% perf-profile.children.cycles-pp.native_sched_clock
0.00 +0.3 0.34 ? 15% perf-profile.children.cycles-pp.available_idle_cpu
0.00 +0.3 0.34 ? 18% perf-profile.children.cycles-pp.aa_file_perm
0.00 +0.3 0.35 ? 26% perf-profile.children.cycles-pp.__task_pid_nr_ns
0.00 +0.4 0.35 ? 20% perf-profile.children.cycles-pp.__update_load_avg_cfs_rq
0.00 +0.4 0.36 ? 5% perf-profile.children.cycles-pp.__hrtimer_run_queues
0.00 +0.4 0.37 ? 24% perf-profile.children.cycles-pp.asm_sysvec_reschedule_ipi
0.00 +0.4 0.37 ? 21% perf-profile.children.cycles-pp.sched_clock_cpu
0.00 +0.4 0.38 ? 23% perf-profile.children.cycles-pp.__mod_memcg_lruvec_state
0.00 +0.4 0.39 ? 16% perf-profile.children.cycles-pp.wait_for_unix_gc
0.00 +0.4 0.41 ? 25% perf-profile.children.cycles-pp.perf_event_pid_type
0.00 +0.4 0.41 ? 23% perf-profile.children.cycles-pp.__update_load_avg_se
0.00 +0.4 0.42 ? 16% perf-profile.children.cycles-pp.__might_fault
0.00 +0.4 0.43 ? 15% perf-profile.children.cycles-pp.__switch_to_asm
0.00 +0.4 0.43 ? 5% perf-profile.children.cycles-pp.hrtimer_interrupt
0.00 +0.4 0.44 ? 28% perf-profile.children.cycles-pp.__enqueue_entity
0.00 +0.4 0.44 ? 20% perf-profile.children.cycles-pp.restore_fpregs_from_fpstate
0.00 +0.4 0.45 ? 16% perf-profile.children.cycles-pp.__pthread_enable_asynccancel
0.00 +0.5 0.45 ? 12% perf-profile.children.cycles-pp.skb_unlink
0.00 +0.5 0.46 ? 29% perf-profile.children.cycles-pp.perf_output_begin_forward
0.00 +0.5 0.48 ? 23% perf-profile.children.cycles-pp.ftrace_graph_ret_addr
0.00 +0.5 0.49 ? 26% perf-profile.children.cycles-pp.__get_user_nocheck_8
0.00 +0.5 0.49 ? 18% perf-profile.children.cycles-pp.update_rq_clock
0.00 +0.5 0.49 ? 18% perf-profile.children.cycles-pp.check_preempt_wakeup
0.00 +0.5 0.52 ? 11% perf-profile.children.cycles-pp.__virt_addr_valid
0.00 +0.5 0.53 ? 13% perf-profile.children.cycles-pp.__might_sleep
0.00 +0.5 0.54 ? 24% perf-profile.children.cycles-pp.set_next_entity
0.00 +0.5 0.54 ? 18% perf-profile.children.cycles-pp.page_counter_cancel
0.00 +0.6 0.56 ? 20% perf-profile.children.cycles-pp.page_counter_charge
0.00 +0.6 0.60 ? 18% perf-profile.children.cycles-pp.switch_fpu_return
0.00 +0.6 0.60 ? 16% perf-profile.children.cycles-pp.check_preempt_curr
0.00 +0.6 0.60 ? 22% perf-profile.children.cycles-pp._raw_spin_unlock_irqrestore
0.00 +0.6 0.61 ? 23% perf-profile.children.cycles-pp.reweight_entity
0.00 +0.6 0.62 ? 12% perf-profile.children.cycles-pp.__pthread_disable_asynccancel
0.00 +0.6 0.62 ? 18% perf-profile.children.cycles-pp.page_counter_uncharge
0.00 +0.6 0.64 ? 17% perf-profile.children.cycles-pp.ttwu_do_wakeup
0.00 +0.6 0.64 ? 17% perf-profile.children.cycles-pp.select_idle_cpu
0.00 +0.7 0.65 ? 12% perf-profile.children.cycles-pp.security_socket_sendmsg
0.00 +0.7 0.66 ? 37% perf-profile.children.cycles-pp.memcpy_erms
0.00 +0.7 0.68 ? 26% perf-profile.children.cycles-pp.core_kernel_text
0.00 +0.7 0.68 ? 23% perf-profile.children.cycles-pp.___slab_alloc
0.00 +0.7 0.71 ? 8% perf-profile.children.cycles-pp.mutex_lock
0.00 +0.7 0.73 ? 37% perf-profile.children.cycles-pp.perf_output_copy
0.00 +0.7 0.74 ? 26% perf-profile.children.cycles-pp.__perf_event_header__init_id
0.00 +0.7 0.74 ? 23% perf-profile.children.cycles-pp.__slab_alloc
0.00 +0.7 0.75 ? 17% perf-profile.children.cycles-pp.obj_cgroup_charge_pages
0.00 +0.7 0.75 ? 17% perf-profile.children.cycles-pp.obj_cgroup_uncharge_pages
0.00 +0.7 0.75 ? 26% perf-profile.children.cycles-pp.perf_callchain_user
0.00 +0.8 0.76 ? 13% perf-profile.children.cycles-pp.security_socket_recvmsg
0.00 +0.8 0.80 ? 27% perf-profile.children.cycles-pp.unix_write_space
0.00 +0.8 0.82 ? 8% perf-profile.children.cycles-pp.prepare_to_wait
0.00 +0.8 0.84 ? 19% perf-profile.children.cycles-pp.skb_queue_tail
0.00 +0.8 0.85 ? 13% perf-profile.children.cycles-pp.sock_recvmsg
0.00 +0.9 0.86 ? 19% perf-profile.children.cycles-pp.skb_set_owner_w
0.00 +1.0 0.96 ? 13% perf-profile.children.cycles-pp.get_obj_cgroup_from_current
0.00 +1.0 1.00 ? 19% perf-profile.children.cycles-pp.obj_cgroup_charge
0.00 +1.1 1.07 ? 37% perf-profile.children.cycles-pp.perf_output_sample
0.00 +1.1 1.07 ? 20% perf-profile.children.cycles-pp.mod_objcg_state
0.00 +1.1 1.07 ? 17% perf-profile.children.cycles-pp.select_idle_sibling
0.00 +1.1 1.08 ? 14% perf-profile.children.cycles-pp.common_file_perm
0.00 +1.1 1.15 ? 12% perf-profile.children.cycles-pp.aa_sk_perm
0.00 +1.2 1.19 ? 13% perf-profile.children.cycles-pp.syscall_return_via_sysret
0.00 +1.2 1.22 ? 17% perf-profile.children.cycles-pp.update_load_avg
0.00 +1.3 1.30 ? 17% perf-profile.children.cycles-pp._copy_from_iter
0.00 +1.3 1.30 ? 13% perf-profile.children.cycles-pp.select_task_rq_fair
0.00 +1.3 1.31 ? 14% perf-profile.children.cycles-pp.security_file_permission
0.00 +1.4 1.38 ? 22% perf-profile.children.cycles-pp.pick_next_task_fair
0.00 +1.5 1.46 ? 14% perf-profile.children.cycles-pp.simple_copy_to_iter
0.00 +1.5 1.51 ? 26% perf-profile.children.cycles-pp.stack_access_ok
0.00 +1.7 1.68 ? 28% perf-profile.children.cycles-pp.kernel_text_address
0.00 +1.7 1.69 ? 20% perf-profile.children.cycles-pp.sock_wfree
0.00 +1.7 1.74 ? 20% perf-profile.children.cycles-pp.copyout
0.00 +1.8 1.85 ? 20% perf-profile.children.cycles-pp.unix_destruct_scm
0.00 +1.9 1.89 ? 20% perf-profile.children.cycles-pp.skb_release_head_state
0.00 +1.9 1.91 ? 20% perf-profile.children.cycles-pp.skb_release_all
0.00 +2.0 2.04 ? 27% perf-profile.children.cycles-pp.__kernel_text_address
0.00 +2.1 2.12 ? 13% perf-profile.children.cycles-pp.__slab_free
0.00 +2.2 2.19 ? 18% perf-profile.children.cycles-pp._copy_to_iter
0.00 +2.2 2.24 ? 15% perf-profile.children.cycles-pp.__fget_light
0.00 +2.3 2.30 ? 17% perf-profile.children.cycles-pp.skb_copy_datagram_from_iter
0.00 +2.3 2.31 ? 15% perf-profile.children.cycles-pp.__fdget_pos
0.00 +2.4 2.35 ? 24% perf-profile.children.cycles-pp.orc_find
0.00 +2.5 2.50 ? 27% perf-profile.children.cycles-pp.unwind_get_return_address
0.00 +2.8 2.82 ? 17% perf-profile.children.cycles-pp.kmem_cache_alloc_node
0.00 +3.2 3.20 ? 15% perf-profile.children.cycles-pp.skb_release_data
0.00 +3.3 3.26 ? 15% perf-profile.children.cycles-pp.__kmalloc_node_track_caller
0.00 +3.4 3.35 ? 15% perf-profile.children.cycles-pp.kmalloc_reserve
0.00 +3.7 3.70 ? 23% perf-profile.children.cycles-pp.__orc_find
0.00 +3.8 3.76 ? 16% perf-profile.children.cycles-pp.__skb_datagram_iter
0.00 +3.8 3.80 ? 16% perf-profile.children.cycles-pp.skb_copy_datagram_iter
0.00 +3.8 3.83 ? 16% perf-profile.children.cycles-pp.unix_stream_read_actor
0.00 +5.4 5.38 ? 17% perf-profile.children.cycles-pp.consume_skb
0.00 +5.9 5.91 ? 20% perf-profile.children.cycles-pp.perf_trace_sched_wakeup_template
0.43 ?223% +6.4 6.86 ? 24% perf-profile.children.cycles-pp.perf_trace_sched_switch
0.00 +6.6 6.61 ? 20% perf-profile.children.cycles-pp.dequeue_entity
0.00 +7.2 7.24 ? 16% perf-profile.children.cycles-pp.__alloc_skb
0.00 +7.4 7.36 ? 16% perf-profile.children.cycles-pp.alloc_skb_with_frags
0.00 +7.4 7.39 ? 24% perf-profile.children.cycles-pp.__unwind_start
0.00 +7.4 7.40 ? 21% perf-profile.children.cycles-pp.enqueue_entity
0.00 +7.5 7.53 ? 20% perf-profile.children.cycles-pp.dequeue_task_fair
0.43 ?223% +8.0 8.41 ? 13% perf-profile.children.cycles-pp._raw_spin_lock
0.00 +8.3 8.31 ? 20% perf-profile.children.cycles-pp.enqueue_task_fair
0.00 +8.4 8.36 ? 20% perf-profile.children.cycles-pp.ttwu_do_activate
0.00 +9.2 9.21 ? 17% perf-profile.children.cycles-pp.sock_alloc_send_pskb
0.00 +9.2 9.22 ? 27% perf-profile.children.cycles-pp._raw_spin_lock_irqsave
0.00 +11.8 11.81 ? 22% perf-profile.children.cycles-pp.perf_trace_sched_stat_runtime
0.00 +13.2 13.17 ? 21% perf-profile.children.cycles-pp.update_curr
0.00 +13.5 13.47 ? 10% perf-profile.children.cycles-pp.native_queued_spin_lock_slowpath
0.43 ?223% +13.9 14.30 ? 24% perf-profile.children.cycles-pp.unwind_next_frame
0.00 +16.8 16.80 ? 21% perf-profile.children.cycles-pp.schedule_timeout
0.43 ?223% +18.3 18.72 ? 24% perf-profile.children.cycles-pp.perf_callchain_kernel
0.86 ?223% +19.0 19.87 ? 22% perf-profile.children.cycles-pp.__schedule
0.43 ?223% +19.5 19.91 ? 24% perf-profile.children.cycles-pp.get_perf_callchain
0.00 +19.5 19.55 ? 22% perf-profile.children.cycles-pp.schedule
0.43 ?223% +19.6 20.04 ? 24% perf-profile.children.cycles-pp.perf_callchain
0.43 ?223% +20.9 21.29 ? 24% perf-profile.children.cycles-pp.perf_prepare_sample
0.48 ?223% +21.3 21.75 ? 19% perf-profile.children.cycles-pp.__wake_up_common
0.00 +21.4 21.38 ? 19% perf-profile.children.cycles-pp.try_to_wake_up
0.00 +21.5 21.54 ? 19% perf-profile.children.cycles-pp.autoremove_wake_function
0.43 ?223% +22.6 23.01 ? 22% perf-profile.children.cycles-pp.perf_event_output_forward
0.43 ?223% +22.7 23.13 ? 22% perf-profile.children.cycles-pp.__perf_event_overflow
0.43 ?223% +22.8 23.18 ? 22% perf-profile.children.cycles-pp.perf_swevent_overflow
0.43 ?223% +23.6 24.06 ? 22% perf-profile.children.cycles-pp.perf_tp_event
0.48 ?223% +28.9 29.38 ? 9% perf-profile.children.cycles-pp.__wake_up_common_lock
0.00 +29.6 29.64 ? 9% perf-profile.children.cycles-pp.sock_def_readable
5.71 ?153% +30.5 36.21 ? 3% perf-profile.children.cycles-pp.vfs_read
5.71 ?153% +31.5 37.22 ? 3% perf-profile.children.cycles-pp.ksys_read
0.00 +32.9 32.85 ? 4% perf-profile.children.cycles-pp.unix_stream_read_generic
0.00 +33.1 33.12 ? 4% perf-profile.children.cycles-pp.unix_stream_recvmsg
1.02 ?144% +33.9 34.93 ? 3% perf-profile.children.cycles-pp.new_sync_read
0.00 +34.5 34.48 ? 4% perf-profile.children.cycles-pp.sock_read_iter
5.71 ?153% +35.1 40.85 ? 2% perf-profile.children.cycles-pp.read
3.61 ? 80% +44.6 48.20 ? 2% perf-profile.children.cycles-pp.new_sync_write
0.00 +45.7 45.70 ? 2% perf-profile.children.cycles-pp.unix_stream_sendmsg
47.86 ? 25% +45.9 93.73 perf-profile.children.cycles-pp.do_syscall_64
3.61 ? 80% +46.3 49.90 ? 2% perf-profile.children.cycles-pp.vfs_write
47.86 ? 25% +46.3 94.19 perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe
0.00 +46.5 46.49 ? 2% perf-profile.children.cycles-pp.sock_sendmsg
0.00 +47.1 47.12 ? 2% perf-profile.children.cycles-pp.sock_write_iter
3.61 ? 80% +48.5 52.08 ? 3% perf-profile.children.cycles-pp.ksys_write
3.61 ? 80% +53.6 57.18 perf-profile.children.cycles-pp.write
31.16 ? 13% -31.0 0.19 ? 36% perf-profile.self.cycles-pp.intel_idle
0.00 +0.1 0.06 ? 16% perf-profile.self.cycles-pp.cpumask_next_wrap
0.00 +0.1 0.06 ? 14% perf-profile.self.cycles-pp.autoremove_wake_function
0.00 +0.1 0.06 ? 17% perf-profile.self.cycles-pp.apparmor_socket_recvmsg
0.00 +0.1 0.07 ? 16% perf-profile.self.cycles-pp.skb_unlink
0.00 +0.1 0.07 ? 23% perf-profile.self.cycles-pp.rb_next
0.00 +0.1 0.07 ? 11% perf-profile.self.cycles-pp.try_charge_memcg
0.00 +0.1 0.07 ? 23% perf-profile.self.cycles-pp.get_stack_info
0.00 +0.1 0.07 ? 25% perf-profile.self.cycles-pp.__x2apic_send_IPI_dest
0.00 +0.1 0.07 ? 17% perf-profile.self.cycles-pp.resched_curr
0.00 +0.1 0.08 ? 12% perf-profile.self.cycles-pp.__list_add_valid
0.00 +0.1 0.08 ? 27% perf-profile.self.cycles-pp.__perf_event_overflow
0.00 +0.1 0.08 ? 31% perf-profile.self.cycles-pp.perf_trace_buf_update
0.00 +0.1 0.08 ? 9% perf-profile.self.cycles-pp.maybe_add_creds
0.00 +0.1 0.08 ? 14% perf-profile.self.cycles-pp.__x64_sys_read
0.00 +0.1 0.08 ? 11% perf-profile.self.cycles-pp.__x64_sys_write
0.00 +0.1 0.09 ? 21% perf-profile.self.cycles-pp.__fdget_pos
0.00 +0.1 0.09 ? 17% perf-profile.self.cycles-pp.kmalloc_reserve
0.00 +0.1 0.09 ? 18% perf-profile.self.cycles-pp.switch_mm_irqs_off
0.00 +0.1 0.09 ? 28% perf-profile.self.cycles-pp.__is_insn_slot_addr
0.00 +0.1 0.09 ? 15% perf-profile.self.cycles-pp.__wrgsbase_inactive
0.00 +0.1 0.09 ? 20% perf-profile.self.cycles-pp.skb_release_data
0.00 +0.1 0.09 ? 17% perf-profile.self.cycles-pp.sock_recvmsg
0.00 +0.1 0.10 ? 22% perf-profile.self.cycles-pp.perf_trace_buf_alloc
0.00 +0.1 0.10 ? 22% perf-profile.self.cycles-pp._find_next_bit
0.00 +0.1 0.10 ? 20% perf-profile.self.cycles-pp.set_next_entity
0.00 +0.1 0.10 ? 11% perf-profile.self.cycles-pp.__skb_datagram_iter
0.00 +0.1 0.10 ? 34% perf-profile.self.cycles-pp.in_gate_area_no_mm
0.00 +0.1 0.10 ? 7% perf-profile.self.cycles-pp.apparmor_socket_getpeersec_dgram
0.00 +0.1 0.10 ? 18% perf-profile.self.cycles-pp.check_preempt_curr
0.00 +0.1 0.10 ? 21% perf-profile.self.cycles-pp.__build_skb_around
0.00 +0.1 0.11 ? 17% perf-profile.self.cycles-pp.security_socket_recvmsg
0.00 +0.1 0.11 ? 17% perf-profile.self.cycles-pp.kmalloc_slab
0.00 +0.1 0.11 ? 8% perf-profile.self.cycles-pp.get_mem_cgroup_from_objcg
0.00 +0.1 0.11 ? 16% perf-profile.self.cycles-pp.wait_for_unix_gc
0.00 +0.1 0.11 ? 18% perf-profile.self.cycles-pp.__wake_up_common_lock
0.00 +0.1 0.11 ? 17% perf-profile.self.cycles-pp.schedule
0.00 +0.1 0.11 ? 11% perf-profile.self.cycles-pp.rb_insert_color
0.00 +0.1 0.12 ? 10% perf-profile.self.cycles-pp.alloc_skb_with_frags
0.00 +0.1 0.12 ? 14% perf-profile.self.cycles-pp.finish_task_switch
0.00 +0.1 0.12 ? 11% perf-profile.self.cycles-pp.syscall_exit_to_user_mode
0.00 +0.1 0.12 ? 19% perf-profile.self.cycles-pp.pick_next_entity
0.00 +0.1 0.12 ? 17% perf-profile.self.cycles-pp.perf_event_output_forward
0.00 +0.1 0.12 ? 17% perf-profile.self.cycles-pp.syscall_enter_from_user_mode
0.00 +0.1 0.12 ? 23% perf-profile.self.cycles-pp.perf_callchain
0.00 +0.1 0.12 ? 14% perf-profile.self.cycles-pp.perf_trace_sched_wakeup_template
0.00 +0.1 0.12 ? 10% perf-profile.self.cycles-pp.security_socket_getpeersec_dgram
0.00 +0.1 0.12 ? 21% perf-profile.self.cycles-pp.mem_cgroup_from_task
0.00 +0.1 0.13 ? 24% perf-profile.self.cycles-pp.propagate_protected_usage
0.00 +0.1 0.13 ? 26% perf-profile.self.cycles-pp.__perf_event_header__init_id
0.00 +0.1 0.13 ? 14% perf-profile.self.cycles-pp.skb_copy_datagram_from_iter
0.00 +0.1 0.13 ? 14% perf-profile.self.cycles-pp.ksys_read
0.00 +0.1 0.13 ? 19% perf-profile.self.cycles-pp.dequeue_entity
0.00 +0.1 0.14 ? 19% perf-profile.self.cycles-pp.set_next_buddy
0.00 +0.1 0.14 ? 22% perf-profile.self.cycles-pp.unix_destruct_scm
0.00 +0.1 0.14 ? 20% perf-profile.self.cycles-pp.schedule_timeout
0.00 +0.1 0.14 ? 27% perf-profile.self.cycles-pp.rb_erase
0.00 +0.1 0.14 ? 18% perf-profile.self.cycles-pp.check_stack_object
0.00 +0.1 0.14 ? 20% perf-profile.self.cycles-pp.tracing_gen_ctx_irq_test
0.00 +0.1 0.15 ? 29% perf-profile.self.cycles-pp.get_callchain_entry
0.00 +0.2 0.15 ? 16% perf-profile.self.cycles-pp.ksys_write
0.00 +0.2 0.15 ? 14% perf-profile.self.cycles-pp.switch_fpu_return
0.00 +0.2 0.16 ? 16% perf-profile.self.cycles-pp.___perf_sw_event
0.00 +0.2 0.16 ? 18% perf-profile.self.cycles-pp.entry_SYSCALL_64_safe_stack
0.00 +0.2 0.16 ? 9% perf-profile.self.cycles-pp.sock_sendmsg
0.00 +0.2 0.17 ? 5% perf-profile.self.cycles-pp.select_task_rq_fair
0.00 +0.2 0.17 ? 16% perf-profile.self.cycles-pp.security_file_permission
0.00 +0.2 0.17 ? 51% perf-profile.self.cycles-pp.bpf_ksym_find
0.00 +0.2 0.17 ? 12% perf-profile.self.cycles-pp.prepare_to_wait
0.00 +0.2 0.18 ? 17% perf-profile.self.cycles-pp.__ksize
0.00 +0.2 0.18 ? 14% perf-profile.self.cycles-pp.refill_obj_stock
0.00 +0.2 0.18 ? 22% perf-profile.self.cycles-pp.dequeue_task_fair
0.00 +0.2 0.19 ? 18% perf-profile.self.cycles-pp.kvm_guest_state
0.00 +0.2 0.20 ? 35% perf-profile.self.cycles-pp.perf_output_sample
0.00 +0.2 0.20 ? 25% perf-profile.self.cycles-pp.perf_trace_sched_switch
0.00 +0.2 0.20 ? 27% perf-profile.self.cycles-pp.perf_callchain_user
0.00 +0.2 0.21 ? 16% perf-profile.self.cycles-pp.sock_alloc_send_pskb
0.00 +0.2 0.21 ? 7% perf-profile.self.cycles-pp.do_syscall_64
0.00 +0.2 0.21 ? 37% perf-profile.self.cycles-pp.perf_output_copy
0.00 +0.2 0.22 ? 14% perf-profile.self.cycles-pp.rcu_all_qs
0.00 +0.2 0.22 ? 12% perf-profile.self.cycles-pp.check_preempt_wakeup
0.00 +0.2 0.22 ? 11% perf-profile.self.cycles-pp.enqueue_task_fair
0.00 +0.2 0.22 ? 24% perf-profile.self.cycles-pp.__calc_delta
0.00 +0.2 0.22 ? 30% perf-profile.self.cycles-pp.perf_tp_event_match
0.00 +0.2 0.23 ? 17% perf-profile.self.cycles-pp.update_min_vruntime
0.00 +0.2 0.23 ? 23% perf-profile.self.cycles-pp.perf_trace_sched_stat_runtime
0.00 +0.2 0.23 ? 25% perf-profile.self.cycles-pp.obj_cgroup_charge
0.00 +0.2 0.24 ? 18% perf-profile.self.cycles-pp.select_idle_sibling
0.00 +0.2 0.24 ? 25% perf-profile.self.cycles-pp.save_fpregs_to_fpstate
0.00 +0.2 0.24 ? 22% perf-profile.self.cycles-pp.reweight_entity
0.00 +0.2 0.25 ? 18% perf-profile.self.cycles-pp.__switch_to
0.00 +0.3 0.26 ? 17% perf-profile.self.cycles-pp.consume_skb
0.00 +0.3 0.26 ? 19% perf-profile.self.cycles-pp.update_cfs_group
0.00 +0.3 0.26 ? 20% perf-profile.self.cycles-pp._raw_spin_unlock_irqrestore
0.00 +0.3 0.26 ? 14% perf-profile.self.cycles-pp.enqueue_entity
0.00 +0.3 0.27 ? 8% perf-profile.self.cycles-pp.unix_stream_recvmsg
0.00 +0.3 0.27 ? 21% perf-profile.self.cycles-pp.select_idle_cpu
0.00 +0.3 0.28 ? 10% perf-profile.self.cycles-pp.unix_write_space
0.00 +0.3 0.28 ? 13% perf-profile.self.cycles-pp._copy_to_iter
0.00 +0.3 0.29 ? 23% perf-profile.self.cycles-pp.get_perf_callchain
0.00 +0.3 0.30 ? 17% perf-profile.self.cycles-pp.pick_next_task_fair
0.00 +0.3 0.30 ? 15% perf-profile.self.cycles-pp._copy_from_iter
0.00 +0.3 0.31 ? 23% perf-profile.self.cycles-pp.cpuacct_charge
0.00 +0.3 0.32 ? 21% perf-profile.self.cycles-pp.native_sched_clock
0.00 +0.3 0.32 ? 17% perf-profile.self.cycles-pp.aa_file_perm
0.00 +0.3 0.32 ? 32% perf-profile.self.cycles-pp.perf_prepare_sample
0.00 +0.3 0.33 ? 24% perf-profile.self.cycles-pp.__kernel_text_address
0.00 +0.3 0.33 ? 15% perf-profile.self.cycles-pp.available_idle_cpu
0.00 +0.3 0.34 ? 21% perf-profile.self.cycles-pp.__update_load_avg_cfs_rq
0.00 +0.3 0.34 ? 24% perf-profile.self.cycles-pp.__mod_memcg_lruvec_state
0.00 +0.3 0.35 ? 25% perf-profile.self.cycles-pp.__task_pid_nr_ns
0.00 +0.4 0.36 ? 18% perf-profile.self.cycles-pp.update_rq_clock
0.00 +0.4 0.39 ? 24% perf-profile.self.cycles-pp.core_kernel_text
0.00 +0.4 0.39 ? 25% perf-profile.self.cycles-pp.ftrace_graph_ret_addr
0.00 +0.4 0.39 ? 7% perf-profile.self.cycles-pp.mutex_lock
0.00 +0.4 0.41 ? 23% perf-profile.self.cycles-pp.__update_load_avg_se
0.00 +0.4 0.41 ? 6% perf-profile.self.cycles-pp.new_sync_read
0.00 +0.4 0.42 ? 22% perf-profile.self.cycles-pp.perf_tp_event
0.00 +0.4 0.43 ? 15% perf-profile.self.cycles-pp.__switch_to_asm
0.00 +0.4 0.43 ? 28% perf-profile.self.cycles-pp.__enqueue_entity
0.00 +0.4 0.44 ? 16% perf-profile.self.cycles-pp.__pthread_enable_asynccancel
0.00 +0.4 0.44 ? 20% perf-profile.self.cycles-pp.restore_fpregs_from_fpstate
0.00 +0.4 0.44 ? 27% perf-profile.self.cycles-pp.perf_output_begin_forward
0.00 +0.5 0.46 ? 9% perf-profile.self.cycles-pp.update_load_avg
0.00 +0.5 0.46 ? 10% perf-profile.self.cycles-pp.try_to_wake_up
0.00 +0.5 0.46 ? 26% perf-profile.self.cycles-pp.unwind_get_return_address
0.00 +0.5 0.46 ? 13% perf-profile.self.cycles-pp.__might_sleep
0.00 +0.5 0.47 ? 10% perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe
0.00 +0.5 0.48 ? 27% perf-profile.self.cycles-pp.__get_user_nocheck_8
0.00 +0.5 0.49 ? 13% perf-profile.self.cycles-pp.new_sync_write
0.00 +0.5 0.49 ? 19% perf-profile.self.cycles-pp.page_counter_charge
0.00 +0.5 0.50 ? 11% perf-profile.self.cycles-pp.__virt_addr_valid
0.00 +0.5 0.51 ? 14% perf-profile.self.cycles-pp.sock_read_iter
0.00 +0.5 0.52 ? 23% perf-profile.self.cycles-pp.___slab_alloc
0.00 +0.5 0.54 ? 18% perf-profile.self.cycles-pp.page_counter_cancel
0.00 +0.5 0.54 perf-profile.self.cycles-pp.vfs_read
0.00 +0.6 0.55 ? 17% perf-profile.self.cycles-pp.update_curr
0.00 +0.6 0.55 ? 18% perf-profile.self.cycles-pp.aa_sk_perm
0.00 +0.6 0.58 ? 13% perf-profile.self.cycles-pp.write
0.00 +0.6 0.59 ? 13% perf-profile.self.cycles-pp.read
0.00 +0.6 0.60 ? 12% perf-profile.self.cycles-pp.__pthread_disable_asynccancel
0.00 +0.6 0.63 ? 9% perf-profile.self.cycles-pp.sock_write_iter
0.00 +0.6 0.63 ? 25% perf-profile.self.cycles-pp.__unwind_start
0.00 +0.6 0.65 ? 36% perf-profile.self.cycles-pp.memcpy_erms
0.00 +0.7 0.69 ? 18% perf-profile.self.cycles-pp.mod_objcg_state
0.00 +0.7 0.70 ? 25% perf-profile.self.cycles-pp.kernel_text_address
0.00 +0.7 0.74 ? 14% perf-profile.self.cycles-pp.common_file_perm
0.00 +0.8 0.77 ? 16% perf-profile.self.cycles-pp.sock_def_readable
0.00 +0.8 0.78 ? 16% perf-profile.self.cycles-pp.__alloc_skb
0.00 +0.8 0.79 ? 14% perf-profile.self.cycles-pp.get_obj_cgroup_from_current
0.00 +0.8 0.82 ? 18% perf-profile.self.cycles-pp.__schedule
0.00 +0.9 0.85 ? 19% perf-profile.self.cycles-pp.skb_set_owner_w
0.00 +0.9 0.87 ? 16% perf-profile.self.cycles-pp.__check_object_size
0.00 +0.9 0.88 ? 15% perf-profile.self.cycles-pp.sock_wfree
0.00 +1.0 0.97 ? 17% perf-profile.self.cycles-pp.kmem_cache_alloc_node
0.00 +1.0 1.03 ? 15% perf-profile.self.cycles-pp.vfs_write
0.00 +1.2 1.16 ? 13% perf-profile.self.cycles-pp.__kmalloc_node_track_caller
0.00 +1.2 1.18 ? 13% perf-profile.self.cycles-pp.syscall_return_via_sysret
0.00 +1.3 1.28 ? 26% perf-profile.self.cycles-pp.stack_access_ok
0.00 +1.4 1.37 ? 23% perf-profile.self.cycles-pp.perf_callchain_kernel
0.00 +1.4 1.38 ? 14% perf-profile.self.cycles-pp.unix_stream_sendmsg
0.00 +1.6 1.58 ? 12% perf-profile.self.cycles-pp.unix_stream_read_generic
0.00 +1.9 1.94 ? 8% perf-profile.self.cycles-pp._raw_spin_lock_irqsave
0.00 +2.1 2.10 ? 13% perf-profile.self.cycles-pp.__slab_free
0.00 +2.2 2.19 ? 15% perf-profile.self.cycles-pp.__fget_light
0.00 +2.2 2.24 ? 24% perf-profile.self.cycles-pp.orc_find
0.00 +3.7 3.69 ? 23% perf-profile.self.cycles-pp.__orc_find
0.43 ?223% +6.1 6.55 ? 25% perf-profile.self.cycles-pp.unwind_next_frame
0.00 +13.4 13.45 ? 10% perf-profile.self.cycles-pp.native_queued_spin_lock_slowpath
Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.
--
0-DAY CI Kernel Test Service
https://01.org/lkp
It's been 3 months since I wrote these patches, so memory is vague at
best :/
On Tue, Mar 29, 2022 at 11:03:44AM +0200, Vincent Guittot wrote:
> > +static void place_entity_migrate(struct cfs_rq *cfs_rq, struct sched_entity *se)
> > +{
> > + if (!sched_feat(PLACE_MIGRATE))
> > + return;
> > +
> > + if (cfs_rq->nr_running < se->migrated) {
> > + /*
> > + * Migrated to a shorter runqueue, go first because
> > + * we were under-served on the old runqueue.
> > + */
> > + se->vruntime = cfs_rq->min_vruntime;
> > + return;
> > + }
> > +
> > + /*
> > + * Migrated to a longer runqueue, go last because
> > + * we got over-served on the old runqueue.
> > + */
> > + se->vruntime = cfs_rq->min_vruntime + sched_vslice(cfs_rq, se);
> > +}
> > +
> > static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
> >
> > static inline bool cfs_bandwidth_used(void);
> > @@ -4296,6 +4317,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> >
> > if (flags & ENQUEUE_WAKEUP)
> > place_entity(cfs_rq, se, 0);
> > + else if (se->migrated)
> > + place_entity_migrate(cfs_rq, se);
> >
> > check_schedstat_required();
> > update_stats_enqueue_fair(cfs_rq, se, flags);
> > @@ -6973,14 +6997,15 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
> > * wakee task is less decayed, but giving the wakee more load
> > * sounds not bad.
> > */
> > - remove_entity_load_avg(&p->se);
> > + remove_entity_load_avg(se);
> > }
> >
> > /* Tell new CPU we are migrated */
> > - p->se.avg.last_update_time = 0;
> > + se->avg.last_update_time = 0;
> >
> > /* We have migrated, no longer consider this task hot */
> > - p->se.migrated = 1;
> > + for_each_sched_entity(se)
> > + se->migrated = READ_ONCE(cfs_rq_of(se)->nr_running) + !se->on_rq;
>
> Why do we need to loop on se ? Isn't p->se enough ?
Yeah; I really don't recall why I did that. And looking at it now, it
doesn't really make much sense. I suppose it will trigger
place_entity_migrate() for the group entries, but on the old CPU.
On Thu, 31 Mar 2022 at 13:47, Peter Zijlstra <[email protected]> wrote:
>
>
> It's been 3 months since I wrote these patches, so memory is vague at
> best :/
>
> On Tue, Mar 29, 2022 at 11:03:44AM +0200, Vincent Guittot wrote:
>
> > > +static void place_entity_migrate(struct cfs_rq *cfs_rq, struct sched_entity *se)
> > > +{
> > > + if (!sched_feat(PLACE_MIGRATE))
> > > + return;
> > > +
> > > + if (cfs_rq->nr_running < se->migrated) {
> > > + /*
> > > + * Migrated to a shorter runqueue, go first because
> > > + * we were under-served on the old runqueue.
> > > + */
> > > + se->vruntime = cfs_rq->min_vruntime;
> > > + return;
> > > + }
> > > +
> > > + /*
> > > + * Migrated to a longer runqueue, go last because
> > > + * we got over-served on the old runqueue.
> > > + */
> > > + se->vruntime = cfs_rq->min_vruntime + sched_vslice(cfs_rq, se);
> > > +}
> > > +
> > > static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
> > >
> > > static inline bool cfs_bandwidth_used(void);
> > > @@ -4296,6 +4317,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> > >
> > > if (flags & ENQUEUE_WAKEUP)
> > > place_entity(cfs_rq, se, 0);
> > > + else if (se->migrated)
> > > + place_entity_migrate(cfs_rq, se);
> > >
> > > check_schedstat_required();
> > > update_stats_enqueue_fair(cfs_rq, se, flags);
>
> > > @@ -6973,14 +6997,15 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
> > > * wakee task is less decayed, but giving the wakee more load
> > > * sounds not bad.
> > > */
> > > - remove_entity_load_avg(&p->se);
> > > + remove_entity_load_avg(se);
> > > }
> > >
> > > /* Tell new CPU we are migrated */
> > > - p->se.avg.last_update_time = 0;
> > > + se->avg.last_update_time = 0;
> > >
> > > /* We have migrated, no longer consider this task hot */
> > > - p->se.migrated = 1;
> > > + for_each_sched_entity(se)
> > > + se->migrated = READ_ONCE(cfs_rq_of(se)->nr_running) + !se->on_rq;
> >
> > Why do we need to loop on se ? Isn't p->se enough ?
>
> Yeah; I really don't recall why I did that. And looking at it now, it
> doesn't really make much sense. I suppose it will trigger
> place_entity_migrate() for the group entries, but on the old CPU.
Yes, that is the reason for my question. task is still connected to
prev cfs so you trigger place_entity_migrate() on the sched_entity of
previous hierarchy. Also this will only happen during enqueue if the
se has not run in the meantime. So it 's probably to be a nop most of
the time as the parent cfs is already enqueued when we migrate a
runnable task and in the other case it will break sched_group vruntime
and fairness
>
>