i'm currently working on converting s390 to use the generic entry
functionality. So far things are straigt-forward, there's only one
slight problem. There is a syscall_enter_from_user_mode() which sets
lockdep state and other initial stuff + does the entry work at the same
time. This is a problem on s390 because the way we restart syscalls isn't
as easy as on x86.
My understanding on x86 is that syscalls are restarted there by just rewinding
the program counter and return to user space, so the instruction causing
the syscall gets executed again.
On s390 this doesn't work, because the syscall number might be hard coded
into the 'svc' instruction, so when the syscall number has to be changed we
would repeat the wrong (old) syscall.
So we would need functions that only do the stuff that is required when switching
from user space to kernel and back, and functions which do the system call tracing
and work which might be called repeatedly.
With the attached patch, the s390 code now looks like this:
(i removed some s390 specific stuff here to make the function easier
to read)
__do_syscall is the function which gets called by low level entry.S code:
void noinstr __do_syscall(struct pt_regs *regs)
{
enter_from_user_mode(regs); /* sets lockdep state, and other initial stuff */
/*
* functions that need to run with irqs disabled,
* but lockdep state and other stuff set up
*/
memcpy(®s->gprs[8], S390_lowcore.save_area_sync, 8 * sizeof(unsigned long));
memcpy(®s->int_code, &S390_lowcore.svc_ilc, sizeof(regs->int_code));
regs->psw = S390_lowcore.svc_old_psw;
update_timer_sys();
local_irq_enable();
regs->orig_gpr2 = regs->gprs[2];
do {
regs->flags = _PIF_SYSCALL;
do_syscall(regs);
} while (test_pt_regs_flag(regs, PIF_SYSCALL_RESTART));
exit_to_user_mode();
}
__do_syscall calls do_syscall which does all the syscall work, and this might
be called more than once if PIF_SYSCALL_RESTART is set:
void do_syscall(struct pt_regs *regs)
{
unsigned long nr = regs->int_code & 0xffff;
nr = syscall_enter_from_user_mode_work(regs, nr);
regs->gprs[2] = -ENOSYS;
if (likely(nr < NR_syscalls)) {
regs->gprs[2] = current->thread.sys_call_table[nr](
regs->orig_gpr2, regs->gprs[3],
regs->gprs[4], regs->gprs[5],
regs->gprs[6], regs->gprs[7]);
}
syscall_exit_to_user_mode1(regs);
}
What do you think about the attach patch? I'm also open for a proper name
for syscall_exit_to_user_mode1() ;-)
On s390, we can not call one function which sets lockdep
state and do the syscall work at the same time. There add
make enter_from_user_mode() and exit_to_user_mode() public, and
add syscall_exit_to_user_mode1() which does the same as
syscall_exit_to_user_mode() but skips the final exit_to_user_mode().
Signed-off-by: Sven Schnelle <[email protected]>
---
include/linux/entry-common.h | 4 +++-
kernel/entry/common.c | 35 +++++++++++++++++++++++++++--------
2 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 474f29638d2c..496c9a47eab4 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -124,7 +124,7 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
* to be done between establishing state and handling user mode entry work.
*/
void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
-
+void enter_from_user_mode(struct pt_regs *regs);
/**
* syscall_enter_from_user_mode_work - Check and handle work before invoking
* a syscall
@@ -311,6 +311,8 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
* arch_exit_to_user_mode() to handle e.g. speculation mitigations
*/
void syscall_exit_to_user_mode(struct pt_regs *regs);
+void syscall_exit_to_user_mode1(struct pt_regs *regs);
+void exit_to_user_mode(void);
/**
* irqentry_enter_from_user_mode - Establish state before invoking the irq handler
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index e9e2df3f3f9e..3ad462ebfa15 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -18,7 +18,7 @@
* 2) Invoke context tracking if enabled to reactivate RCU
* 3) Trace interrupts off state
*/
-static __always_inline void enter_from_user_mode(struct pt_regs *regs)
+static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
{
arch_check_user_regs(regs);
lockdep_hardirqs_off(CALLER_ADDR0);
@@ -31,6 +31,11 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs)
instrumentation_end();
}
+void noinstr enter_from_user_mode(struct pt_regs *regs)
+{
+ __enter_from_user_mode(regs);
+}
+
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
{
if (unlikely(audit_context())) {
@@ -92,7 +97,7 @@ noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
{
long ret;
- enter_from_user_mode(regs);
+ __enter_from_user_mode(regs);
instrumentation_begin();
local_irq_enable();
@@ -104,14 +109,14 @@ noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ __enter_from_user_mode(regs);
instrumentation_begin();
local_irq_enable();
instrumentation_end();
}
/**
- * exit_to_user_mode - Fixup state when exiting to user mode
+ * __exit_to_user_mode - Fixup state when exiting to user mode
*
* Syscall/interupt exit enables interrupts, but the kernel state is
* interrupts disabled when this is invoked. Also tell RCU about it.
@@ -122,7 +127,7 @@ noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
* mitigations, etc.
* 4) Tell lockdep that interrupts are enabled
*/
-static __always_inline void exit_to_user_mode(void)
+static __always_inline void __exit_to_user_mode(void)
{
instrumentation_begin();
trace_hardirqs_on_prepare();
@@ -134,6 +139,11 @@ static __always_inline void exit_to_user_mode(void)
lockdep_hardirqs_on(CALLER_ADDR0);
}
+void noinstr exit_to_user_mode(void)
+{
+ __exit_to_user_mode();
+}
+
/* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal(struct pt_regs *regs) { }
@@ -265,12 +275,21 @@ __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
local_irq_disable_exit_to_user();
exit_to_user_mode_prepare(regs);
instrumentation_end();
- exit_to_user_mode();
+ __exit_to_user_mode();
+}
+
+__visible noinstr void syscall_exit_to_user_mode1(struct pt_regs *regs)
+{
+ instrumentation_begin();
+ syscall_exit_to_user_mode_prepare(regs);
+ local_irq_disable_exit_to_user();
+ exit_to_user_mode_prepare(regs);
+ instrumentation_end();
}
noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ __enter_from_user_mode(regs);
}
noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
@@ -278,7 +297,7 @@ noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
instrumentation_begin();
exit_to_user_mode_prepare(regs);
instrumentation_end();
- exit_to_user_mode();
+ __exit_to_user_mode();
}
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
--
2.17.1
Sven,
On Tue, Dec 01 2020 at 09:35, Sven Schnelle wrote:
> On s390, we can not call one function which sets lockdep
> state and do the syscall work at the same time. There add
> make enter_from_user_mode() and exit_to_user_mode() public, and
> add syscall_exit_to_user_mode1() which does the same as
> syscall_exit_to_user_mode() but skips the final exit_to_user_mode().
the explanation in the "cover letter" made at least sense, but the above
is unparseable word salad.
> Signed-off-by: Sven Schnelle <[email protected]>
> ---
> include/linux/entry-common.h | 4 +++-
> kernel/entry/common.c | 35 +++++++++++++++++++++++++++--------
> 2 files changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
> index 474f29638d2c..496c9a47eab4 100644
> --- a/include/linux/entry-common.h
> +++ b/include/linux/entry-common.h
> @@ -124,7 +124,7 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
> * to be done between establishing state and handling user mode entry work.
> */
> void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
> -
> +void enter_from_user_mode(struct pt_regs *regs);
You might have noticed, that all of these function prototypes have
proper kernel documentation. So just glueing this on to the previous
prototype does not cut it. enter_from/exit_to_user_mode() want to go
together into a seperate section.
> /**
> * syscall_enter_from_user_mode_work - Check and handle work before invoking
> * a syscall
> @@ -311,6 +311,8 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
> * arch_exit_to_user_mode() to handle e.g. speculation mitigations
> */
> void syscall_exit_to_user_mode(struct pt_regs *regs);
> +void syscall_exit_to_user_mode1(struct pt_regs *regs);
Same here and as you mentioned ...mode1() is a pretty horrible name.
syscall_exit_to_user_mode_work() perhaps?
> +void exit_to_user_mode(void);
>
> /**
> * irqentry_enter_from_user_mode - Establish state before invoking the irq handler
> diff --git a/kernel/entry/common.c b/kernel/entry/common.c
> index e9e2df3f3f9e..3ad462ebfa15 100644
> --- a/kernel/entry/common.c
> +++ b/kernel/entry/common.c
> @@ -18,7 +18,7 @@
> * 2) Invoke context tracking if enabled to reactivate RCU
> * 3) Trace interrupts off state
> */
> -static __always_inline void enter_from_user_mode(struct pt_regs *regs)
> +static __always_inline void __enter_from_user_mode(struct pt_regs
> *regs)
Can you please split the renaming into a seperate preparatory patch?
> +__visible noinstr void syscall_exit_to_user_mode1(struct pt_regs *regs)
What's the point of marking this function noinstr? Everything it does is
instrumentable.
Thanks,
tglx
Greeting,
FYI, we noticed a -1.9% regression of unixbench.score due to commit:
commit: a358d5636cfcb8b278c766ac261ad99143215041 ("[PATCH] entry: split lockdep and syscall work functions")
url: https://github.com/0day-ci/linux/commits/Sven-Schnelle/entry-split-lockdep-and-syscall-work-functions/20201201-163931
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git 09162bc32c880a791c6c0668ce0745cf7958f576
in testcase: will-it-scale
on test machine: 192 threads Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz with 192G memory
with following parameters:
nr_task: 16
mode: process
test: futex3
cpufreq_governor: performance
ucode: 0x5003003
test-description: Will It Scale takes a testcase and runs it from 1 through to n parallel copies to see if the testcase will scale. It builds both a process and threads based test in order to see any differences between the two.
test-url: https://github.com/antonblanchard/will-it-scale
In addition to that, the commit also has significant impact on the following tests:
If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>
Details are as below:
-------------------------------------------------------------------------------------------------->
To reproduce:
git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp install job.yaml # job file is attached in this email
bin/lkp run job.yaml
=========================================================================================
compiler/cpufreq_governor/kconfig/mode/nr_task/rootfs/tbox_group/test/testcase/ucode:
gcc-9/performance/x86_64-rhel-8.3/process/16/debian-10.4-x86_64-20200603.cgz/lkp-csl-2ap2/futex3/will-it-scale/0x5003003
commit:
v5.10-rc4
a358d5636c ("entry: split lockdep and syscall work functions")
v5.10-rc4 a358d5636cfcb8b278c766ac261
---------------- ---------------------------
fail:runs %reproduction fail:runs
| | |
:7 129% 9:9 stderr.Events_disabled
:7 129% 9:9 stderr.Events_enabled
:7 129% 9:9 stderr.[perf_record:Captured_and_wrote#MB/tmp/lkp/perf-sched.data(#samples)]
:7 129% 9:9 stderr.[perf_record:Woken_up#times_to_write_data]
:7 129% 9:9 stderr.has_stderr
%stddev %change %stddev
\ | \
5516 ? 4% -10.3% 4947 ? 4% boot-time.idle
291515 ? 68% -41.2% 171377 ? 4% cpuidle.POLL.time
22225 ? 62% +280.3% 84528 ? 6% numa-numastat.node1.other_node
4076 ?101% -100.0% 0.00 softirqs.CPU14.NET_RX
10586 +14.2% 12085 slabinfo.kmalloc-2k.active_objs
10704 ? 2% +14.5% 12252 slabinfo.kmalloc-2k.num_objs
4098 ? 15% +203.3% 12431 ? 5% numa-vmstat.node3.nr_active_anon
76843 ? 6% +15.0% 88374 ? 5% numa-vmstat.node3.nr_file_pages
4679 ? 23% +183.0% 13240 ? 6% numa-vmstat.node3.nr_shmem
4098 ? 15% +203.3% 12431 ? 5% numa-vmstat.node3.nr_zone_active_anon
16418 ? 15% +202.6% 49674 ? 5% numa-meminfo.node3.Active
16418 ? 15% +202.6% 49674 ? 5% numa-meminfo.node3.Active(anon)
307349 ? 6% +15.0% 353508 ? 5% numa-meminfo.node3.FilePages
697704 ? 20% +32.9% 927015 ? 13% numa-meminfo.node3.MemUsed
18690 ? 23% +183.4% 52973 ? 6% numa-meminfo.node3.Shmem
19935 ? 8% +169.7% 53758 ? 3% meminfo.Active
19935 ? 8% +169.7% 53758 ? 3% meminfo.Active(anon)
470080 +9.8% 516343 meminfo.Committed_AS
2798824 +15.1% 3220140 meminfo.Memused
31425 ? 5% +109.3% 65771 ? 3% meminfo.Shmem
11733 +27.0% 14898 meminfo.max_used_kB
4977 ? 8% +169.7% 13426 ? 3% proc-vmstat.nr_active_anon
67216 +4.0% 69896 proc-vmstat.nr_anon_pages
293542 +5.6% 310056 proc-vmstat.nr_file_pages
69999 +3.9% 72758 proc-vmstat.nr_inactive_anon
7851 ? 5% +109.3% 16436 ? 3% proc-vmstat.nr_shmem
69914 +2.5% 71683 proc-vmstat.nr_slab_unreclaimable
285726 +2.8% 293640 proc-vmstat.nr_unevictable
4977 ? 8% +169.7% 13426 ? 3% proc-vmstat.nr_zone_active_anon
69999 +3.9% 72758 proc-vmstat.nr_zone_inactive_anon
285726 +2.8% 293640 proc-vmstat.nr_zone_unevictable
989357 +25.2% 1238643 proc-vmstat.numa_hit
896081 +9.2% 978823 proc-vmstat.numa_local
93275 +178.6% 259820 proc-vmstat.numa_other
5379 ? 15% +229.0% 17697 ? 4% proc-vmstat.pgactivate
1060503 +24.6% 1321886 proc-vmstat.pgalloc_normal
1160918 +2.7% 1192221 proc-vmstat.pgfault
1177873 +20.8% 1422376 proc-vmstat.pgfree
9.075e+09 ? 36% +25.9% 1.142e+10 perf-stat.i.branch-instructions
5471478 ? 11% -20.5% 4349395 ? 6% perf-stat.i.cache-misses
1.22 ? 55% -36.0% 0.78 perf-stat.i.cpi
198.19 +1.8% 201.74 perf-stat.i.cpu-migrations
10702 ? 12% +24.4% 13314 ? 5% perf-stat.i.cycles-between-cache-misses
1.488e+10 ? 36% +24.5% 1.853e+10 perf-stat.i.dTLB-loads
1.124e+10 ? 36% +25.2% 1.408e+10 perf-stat.i.dTLB-stores
1.03 ? 36% +25.3% 1.29 perf-stat.i.ipc
0.73 ? 12% +95.6% 1.44 ? 12% perf-stat.i.major-faults
183.61 ? 36% +25.0% 229.60 perf-stat.i.metric.M/sec
3733 +2.4% 3822 perf-stat.i.minor-faults
3734 +2.4% 3823 perf-stat.i.page-faults
1.22 ? 55% -36.2% 0.78 perf-stat.overall.cpi
10582 ? 11% +23.5% 13066 ? 6% perf-stat.overall.cycles-between-cache-misses
1.03 ? 36% +25.3% 1.29 perf-stat.overall.ipc
9.044e+09 ? 36% +25.9% 1.138e+10 perf-stat.ps.branch-instructions
5452944 ? 11% -20.5% 4334998 ? 6% perf-stat.ps.cache-misses
197.71 +1.7% 201.11 perf-stat.ps.cpu-migrations
1.483e+10 ? 36% +24.5% 1.847e+10 perf-stat.ps.dTLB-loads
1.12e+10 ? 36% +25.2% 1.403e+10 perf-stat.ps.dTLB-stores
0.73 ? 12% +95.9% 1.43 ? 12% perf-stat.ps.major-faults
3720 +2.4% 3809 perf-stat.ps.minor-faults
3721 +2.4% 3810 perf-stat.ps.page-faults
21770 ? 23% -100.0% 0.00 sched_debug.cfs_rq:/.exec_clock.avg
110841 ? 11% -100.0% 0.00 sched_debug.cfs_rq:/.exec_clock.max
18.36 ? 18% -100.0% 0.00 sched_debug.cfs_rq:/.exec_clock.min
33023 ? 14% -100.0% 0.00 sched_debug.cfs_rq:/.exec_clock.stddev
0.03 ? 78% -100.0% 0.00 sched_debug.cfs_rq:/.nr_spread_over.avg
1.34 ? 28% -100.0% 0.00 sched_debug.cfs_rq:/.nr_spread_over.max
0.17 ? 49% -100.0% 0.00 sched_debug.cfs_rq:/.nr_spread_over.stddev
1670 ? 11% -100.0% 0.00 sched_debug.cpu.sched_count.avg
67230 ? 12% -100.0% 0.00 sched_debug.cpu.sched_count.max
512.85 ? 9% -100.0% 0.00 sched_debug.cpu.sched_count.min
5175 ? 10% -100.0% 0.00 sched_debug.cpu.sched_count.stddev
755.90 ? 11% -100.0% 0.00 sched_debug.cpu.sched_goidle.avg
33544 ? 12% -100.0% 0.00 sched_debug.cpu.sched_goidle.max
70.86 ? 29% -100.0% 0.00 sched_debug.cpu.sched_goidle.min
2591 ? 10% -100.0% 0.00 sched_debug.cpu.sched_goidle.stddev
734.02 ? 12% -100.0% 0.00 sched_debug.cpu.ttwu_count.avg
33982 ? 13% -100.0% 0.00 sched_debug.cpu.ttwu_count.max
181.24 ? 9% -100.0% 0.00 sched_debug.cpu.ttwu_count.min
2616 ? 10% -100.0% 0.00 sched_debug.cpu.ttwu_count.stddev
308.36 ? 15% -100.0% 0.00 sched_debug.cpu.ttwu_local.avg
3731 ?105% -100.0% 0.00 sched_debug.cpu.ttwu_local.max
176.38 ? 10% -100.0% 0.00 sched_debug.cpu.ttwu_local.min
371.08 ? 77% -100.0% 0.00 sched_debug.cpu.ttwu_local.stddev
0.00 +0.6 0.62 ? 7% perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode_prepare.syscall_exit_to_user_mode.entry_SYSCALL_64_after_hwframe.syscall
0.00 +0.1 0.06 ? 18% perf-profile.children.cycles-pp.unwind_next_frame
0.00 +0.1 0.09 ? 13% perf-profile.children.cycles-pp.perf_callchain_kernel
0.00 +0.1 0.10 ? 13% perf-profile.children.cycles-pp.get_perf_callchain
0.00 +0.1 0.10 ? 13% perf-profile.children.cycles-pp.perf_callchain
0.00 +0.1 0.11 ? 12% perf-profile.children.cycles-pp.perf_prepare_sample
0.00 +0.1 0.11 ? 23% perf-profile.children.cycles-pp.process_simple
0.00 +0.1 0.11 ? 25% perf-profile.children.cycles-pp.perf_session__process_events
0.00 +0.1 0.11 ? 25% perf-profile.children.cycles-pp.record__finish_output
0.21 ? 39% +0.1 0.33 ? 8% perf-profile.children.cycles-pp.syscall@plt
0.00 +0.1 0.12 ? 13% perf-profile.children.cycles-pp.perf_trace_sched_stat_runtime
0.00 +0.1 0.12 ? 12% perf-profile.children.cycles-pp.perf_event_output_forward
0.00 +0.1 0.13 ? 12% perf-profile.children.cycles-pp.perf_swevent_overflow
0.00 +0.1 0.13 ? 12% perf-profile.children.cycles-pp.__perf_event_overflow
0.00 +0.1 0.13 ? 14% perf-profile.children.cycles-pp.update_curr
0.00 +0.1 0.13 ? 12% perf-profile.children.cycles-pp.perf_tp_event
0.00 +0.1 0.14 ? 24% perf-profile.children.cycles-pp.cmd_sched
0.00 +0.1 0.14 ? 24% perf-profile.children.cycles-pp.cmd_record
0.21 ? 27% +0.1 0.36 ? 17% perf-profile.children.cycles-pp.scheduler_tick
0.00 +0.1 0.15 ? 12% perf-profile.children.cycles-pp.task_tick_fair
0.00 +0.2 0.16 ? 21% perf-profile.children.cycles-pp.__libc_start_main
0.00 +0.2 0.16 ? 21% perf-profile.children.cycles-pp.main
0.00 +0.2 0.16 ? 21% perf-profile.children.cycles-pp.run_builtin
0.00 +0.6 0.62 ? 7% perf-profile.children.cycles-pp.syscall_exit_to_user_mode_prepare
12.66 ?140% -11.4 1.22 ? 8% perf-profile.self.cycles-pp.syscall_exit_to_user_mode
0.00 +0.6 0.62 ? 7% perf-profile.self.cycles-pp.syscall_exit_to_user_mode_prepare
8798 ?204% -100.0% 0.00 interrupts.161:PCI-MSI.524289-edge.eth0-TxRx-0
2564 ?100% -100.0% 0.00 interrupts.162:PCI-MSI.524290-edge.eth0-TxRx-1
10781 ?172% -100.0% 0.00 interrupts.163:PCI-MSI.524291-edge.eth0-TxRx-2
7838 ?101% -100.0% 0.00 interrupts.164:PCI-MSI.524292-edge.eth0-TxRx-3
34.71 ? 31% +448.9% 190.56 ? 68% interrupts.CPU10.RES:Rescheduling_interrupts
8798 ?204% -100.0% 0.00 interrupts.CPU11.161:PCI-MSI.524289-edge.eth0-TxRx-0
2156 ? 21% -28.4% 1544 ? 20% interrupts.CPU115.CAL:Function_call_interrupts
2090 ? 17% -25.8% 1551 ? 20% interrupts.CPU116.CAL:Function_call_interrupts
2564 ?100% -100.0% 0.00 interrupts.CPU12.162:PCI-MSI.524290-edge.eth0-TxRx-1
2330 ? 40% -31.8% 1588 ? 18% interrupts.CPU123.CAL:Function_call_interrupts
2014 ? 4% -21.2% 1587 ? 19% interrupts.CPU124.CAL:Function_call_interrupts
2689 ? 42% -40.8% 1593 ? 16% interrupts.CPU125.CAL:Function_call_interrupts
2214 ? 27% -29.9% 1552 ? 19% interrupts.CPU126.CAL:Function_call_interrupts
10781 ?172% -100.0% 0.00 interrupts.CPU13.163:PCI-MSI.524291-edge.eth0-TxRx-2
7838 ?101% -100.0% 0.00 interrupts.CPU14.164:PCI-MSI.524292-edge.eth0-TxRx-3
1962 ? 9% -16.7% 1634 ? 13% interrupts.CPU144.CAL:Function_call_interrupts
1913 ? 5% -17.8% 1572 ? 19% interrupts.CPU146.CAL:Function_call_interrupts
1877 ? 3% -17.7% 1545 ? 20% interrupts.CPU151.CAL:Function_call_interrupts
2622 ? 69% -40.6% 1558 ? 20% interrupts.CPU165.CAL:Function_call_interrupts
118.43 ? 29% +332.2% 511.89 ? 24% interrupts.CPU191.NMI:Non-maskable_interrupts
118.43 ? 29% +332.2% 511.89 ? 24% interrupts.CPU191.PMI:Performance_monitoring_interrupts
2625 ? 70% -40.7% 1556 ? 19% interrupts.CPU41.CAL:Function_call_interrupts
1903 ? 4% -18.8% 1544 ? 20% interrupts.CPU45.CAL:Function_call_interrupts
1955 ? 6% -17.9% 1606 ? 20% interrupts.CPU49.CAL:Function_call_interrupts
2521 ? 40% -38.1% 1560 ? 20% interrupts.CPU75.CAL:Function_call_interrupts
147.43 ? 9% +121.8% 327.00 ? 22% interrupts.CPU95.NMI:Non-maskable_interrupts
147.43 ? 9% +121.8% 327.00 ? 22% interrupts.CPU95.PMI:Performance_monitoring_interrupts
6669 ? 35% -39.2% 4053 ? 17% interrupts.RES:Rescheduling_interrupts
will-it-scale.per_process_ops
1.04e+07 +---------------------------------------------------------------+
| +..+ +.. |
1.035e+07 |..+..+.+.. .. + .. |
| + + +..+.. |
| + +.. +.. +..|
1.03e+07 |-+ + .. .+.. .. +.. .. |
| + +. +.+ +.+ |
1.025e+07 |-+ |
| O O O O O |
1.02e+07 |-+ O O O O O O O O O O O |
| O O O O O |
| |
1.015e+07 |-+ |
| O |
1.01e+07 +---------------------------------------------------------------+
[*] bisect-good sample
[O] bisect-bad sample
***************************************************************************************************
lkp-csl-2sp4: 96 threads Intel(R) Xeon(R) CPU @ 2.30GHz with 128G memory
=========================================================================================
compiler/cpufreq_governor/kconfig/nr_task/rootfs/runtime/tbox_group/test/testcase/ucode:
gcc-9/performance/x86_64-rhel-8.3/1/debian-10.4-x86_64-20200603.cgz/300s/lkp-csl-2sp4/syscall/unixbench/0x4003003
commit:
v5.10-rc4
a358d5636c ("entry: split lockdep and syscall work functions")
v5.10-rc4 a358d5636cfcb8b278c766ac261
---------------- ---------------------------
fail:runs %reproduction fail:runs
| | |
:9 44% 4:4 stderr.Events_disabled
:9 44% 4:4 stderr.Events_enabled
:9 44% 4:4 stderr.[perf_record:Captured_and_wrote#MB/tmp/lkp/perf-sched.data(#samples)]
:9 44% 4:4 stderr.[perf_record:Woken_up#times_to_write_data]
:9 44% 4:4 stderr.has_stderr
1:9 -10% 0:4 perf-profile.children.cycles-pp.error_entry
1:9 -9% 0:4 perf-profile.self.cycles-pp.error_entry
%stddev %change %stddev
\ | \
1506 -1.9% 1478 unixbench.score
54357 -7.7% 50172 unixbench.time.minor_page_faults
92.23 +1.7% 93.76 unixbench.time.user_time
8.815e+08 -1.8% 8.654e+08 unixbench.workload
21.47 +3.1% 22.13 boot-time.boot
32298 ? 41% -37.1% 20310 ? 7% softirqs.CPU62.RCU
6703 ? 3% +11.1% 7448 slabinfo.kmalloc-2k.active_objs
6757 ? 3% +12.0% 7567 slabinfo.kmalloc-2k.num_objs
837.78 ? 11% +27.9% 1071 ? 5% slabinfo.mnt_cache.active_objs
837.78 ? 11% +27.9% 1071 ? 5% slabinfo.mnt_cache.num_objs
3003 ? 52% +196.2% 8895 ? 7% numa-meminfo.node1.Active
2964 ? 53% +198.2% 8839 ? 7% numa-meminfo.node1.Active(anon)
450792 ? 6% +11.8% 503916 ? 9% numa-meminfo.node1.FilePages
8176 ? 40% +87.8% 15358 ? 20% numa-meminfo.node1.Shmem
442607 ? 6% +10.4% 488571 ? 9% numa-meminfo.node1.Unevictable
6811 ? 3% +75.3% 11943 ? 5% meminfo.Active
6545 ? 4% +71.0% 11193 ? 5% meminfo.Active(anon)
265.89 ?107% +181.6% 748.75 ? 74% meminfo.Active(file)
2086104 +12.7% 2350606 meminfo.Memused
16185 +28.7% 20830 ? 3% meminfo.Shmem
5936 ? 2% +21.3% 7199 ? 2% meminfo.max_used_kB
740.67 ? 53% +198.6% 2211 ? 8% numa-vmstat.node1.nr_active_anon
112698 ? 6% +11.8% 125979 ? 9% numa-vmstat.node1.nr_file_pages
2044 ? 40% +87.9% 3840 ? 20% numa-vmstat.node1.nr_shmem
110651 ? 6% +10.4% 122142 ? 9% numa-vmstat.node1.nr_unevictable
740.67 ? 53% +198.6% 2211 ? 8% numa-vmstat.node1.nr_zone_active_anon
110651 ? 6% +10.4% 122142 ? 9% numa-vmstat.node1.nr_zone_unevictable
1770566 ? 9% -26.4% 1303772 ? 23% perf-stat.i.cache-misses
4073 ? 6% +32.6% 5402 ? 13% perf-stat.i.cycles-between-cache-misses
6.456e+08 +1.3% 6.541e+08 perf-stat.i.dTLB-stores
0.48 ? 9% +11.1% 0.53 ? 8% perf-stat.i.ipc
2903 +1.1% 2936 perf-stat.i.minor-faults
2903 +1.1% 2936 perf-stat.i.page-faults
3677 +25.2% 4604 ? 9% perf-stat.overall.cycles-between-cache-misses
1766768 ? 9% -26.4% 1301105 ? 23% perf-stat.ps.cache-misses
6.438e+08 +1.3% 6.524e+08 perf-stat.ps.dTLB-stores
2896 +1.1% 2929 perf-stat.ps.minor-faults
2897 +1.1% 2929 perf-stat.ps.page-faults
0.55 ? 38% +0.4 0.96 ? 13% perf-profile.calltrace.cycles-pp.exit_to_user_mode_prepare.syscall_exit_to_user_mode.entry_SYSCALL_64_after_hwframe
1.13 ? 12% +0.6 1.74 ? 14% perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.entry_SYSCALL_64_after_hwframe
0.14 ? 13% +0.0 0.18 ? 8% perf-profile.children.cycles-pp.call_cpuidle
0.12 ? 22% +0.1 0.18 ? 24% perf-profile.children.cycles-pp.do_user_addr_fault
0.14 ? 19% +0.1 0.20 ? 18% perf-profile.children.cycles-pp.asm_exc_page_fault
0.13 ? 22% +0.1 0.18 ? 20% perf-profile.children.cycles-pp.exc_page_fault
0.11 ? 26% +0.1 0.16 ? 22% perf-profile.children.cycles-pp.__handle_mm_fault
0.11 ? 23% +0.1 0.17 ? 22% perf-profile.children.cycles-pp.handle_mm_fault
0.77 ? 11% +0.5 1.24 ? 13% perf-profile.children.cycles-pp.exit_to_user_mode_prepare
0.00 +0.6 0.60 ? 20% perf-profile.children.cycles-pp.syscall_exit_to_user_mode_prepare
1.70 ? 8% +0.8 2.51 ? 15% perf-profile.children.cycles-pp.syscall_exit_to_user_mode
0.14 ? 15% +0.0 0.18 ? 8% perf-profile.self.cycles-pp.call_cpuidle
0.06 ? 59% +0.0 0.10 ? 27% perf-profile.self.cycles-pp.trigger_load_balance
0.63 ? 12% +0.5 1.12 ? 15% perf-profile.self.cycles-pp.exit_to_user_mode_prepare
0.00 +0.5 0.53 ? 17% perf-profile.self.cycles-pp.syscall_exit_to_user_mode_prepare
1635 ? 4% +71.0% 2795 ? 5% proc-vmstat.nr_active_anon
65.89 ?108% +183.8% 187.00 ? 74% proc-vmstat.nr_active_file
58918 +5.3% 62014 proc-vmstat.nr_anon_pages
239429 +3.9% 248685 proc-vmstat.nr_file_pages
61168 +5.0% 64234 proc-vmstat.nr_inactive_anon
8304 +2.2% 8483 proc-vmstat.nr_mapped
4045 +28.7% 5208 ? 3% proc-vmstat.nr_shmem
235310 +3.4% 243282 proc-vmstat.nr_unevictable
1635 ? 4% +71.0% 2795 ? 5% proc-vmstat.nr_zone_active_anon
65.89 ?108% +183.8% 187.00 ? 74% proc-vmstat.nr_zone_active_file
61168 +5.0% 64234 proc-vmstat.nr_zone_inactive_anon
235310 +3.4% 243282 proc-vmstat.nr_zone_unevictable
937318 +14.1% 1069639 proc-vmstat.numa_hit
906134 +8.1% 979942 proc-vmstat.numa_local
31184 +187.6% 89696 ? 5% proc-vmstat.numa_other
999.44 ? 15% +282.9% 3827 ? 3% proc-vmstat.pgactivate
996499 +13.8% 1134186 proc-vmstat.pgalloc_normal
1174226 +1.3% 1189681 proc-vmstat.pgfault
1031237 +12.2% 1157491 proc-vmstat.pgfree
625.89 ?222% -87.5% 78.25 ? 30% interrupts.CPU11.NMI:Non-maskable_interrupts
625.89 ?222% -87.5% 78.25 ? 30% interrupts.CPU11.PMI:Performance_monitoring_interrupts
24.89 ?102% +152.1% 62.75 ? 61% interrupts.CPU12.RES:Rescheduling_interrupts
337.44 ? 44% +195.5% 997.00 ? 82% interrupts.CPU36.CAL:Function_call_interrupts
363.89 ? 33% +67.4% 609.25 ? 19% interrupts.CPU38.CAL:Function_call_interrupts
408.78 ? 56% +5575.0% 23198 ?166% interrupts.CPU4.CAL:Function_call_interrupts
347.89 ? 47% +106.7% 719.25 ? 44% interrupts.CPU42.CAL:Function_call_interrupts
347.89 ? 47% +56.9% 545.75 ? 3% interrupts.CPU45.CAL:Function_call_interrupts
652.78 ?216% -87.8% 79.75 ? 42% interrupts.CPU53.NMI:Non-maskable_interrupts
652.78 ?216% -87.8% 79.75 ? 42% interrupts.CPU53.PMI:Performance_monitoring_interrupts
288.11 ?145% -67.2% 94.50 ? 27% interrupts.CPU54.NMI:Non-maskable_interrupts
288.11 ?145% -67.2% 94.50 ? 27% interrupts.CPU54.PMI:Performance_monitoring_interrupts
128.44 ? 18% -49.4% 65.00 ? 33% interrupts.CPU55.NMI:Non-maskable_interrupts
128.44 ? 18% -49.4% 65.00 ? 33% interrupts.CPU55.PMI:Performance_monitoring_interrupts
142.33 ? 14% -36.2% 90.75 ? 27% interrupts.CPU56.NMI:Non-maskable_interrupts
142.33 ? 14% -36.2% 90.75 ? 27% interrupts.CPU56.PMI:Performance_monitoring_interrupts
141.78 ? 15% -44.8% 78.25 ? 23% interrupts.CPU58.NMI:Non-maskable_interrupts
141.78 ? 15% -44.8% 78.25 ? 23% interrupts.CPU58.PMI:Performance_monitoring_interrupts
351.22 ? 47% +221.2% 1128 ? 85% interrupts.CPU81.CAL:Function_call_interrupts
363.22 ? 47% +500.0% 2179 ? 76% interrupts.CPU90.CAL:Function_call_interrupts
3295 ? 9% -35.4% 2128 ? 10% interrupts.RES:Rescheduling_interrupts
1796 ? 23% -100.0% 0.00 sched_debug.cfs_rq:/.exec_clock.avg
35813 ? 23% -100.0% 0.00 sched_debug.cfs_rq:/.exec_clock.max
11.90 ? 29% -100.0% 0.00 sched_debug.cfs_rq:/.exec_clock.min
6114 ? 21% -100.0% 0.00 sched_debug.cfs_rq:/.exec_clock.stddev
0.04 ? 7% +11.2% 0.05 ? 9% sched_debug.cpu.nr_running.avg
2585 ? 16% -100.0% 0.00 sched_debug.cpu.sched_count.avg
56814 ? 16% -100.0% 0.00 sched_debug.cpu.sched_count.max
602.46 ? 8% -100.0% 0.00 sched_debug.cpu.sched_count.min
6511 ? 23% -100.0% 0.00 sched_debug.cpu.sched_count.stddev
1213 ? 16% -100.0% 0.00 sched_debug.cpu.sched_goidle.avg
28314 ? 16% -100.0% 0.00 sched_debug.cpu.sched_goidle.max
216.95 ? 8% -100.0% 0.00 sched_debug.cpu.sched_goidle.min
3257 ? 23% -100.0% 0.00 sched_debug.cpu.sched_goidle.stddev
1155 ? 17% -100.0% 0.00 sched_debug.cpu.ttwu_count.avg
26529 ? 21% -100.0% 0.00 sched_debug.cpu.ttwu_count.max
212.36 ? 8% -100.0% 0.00 sched_debug.cpu.ttwu_count.min
3187 ? 26% -100.0% 0.00 sched_debug.cpu.ttwu_count.stddev
535.27 ? 28% -100.0% 0.00 sched_debug.cpu.ttwu_local.avg
11898 ? 98% -100.0% 0.00 sched_debug.cpu.ttwu_local.max
210.09 ? 8% -100.0% 0.00 sched_debug.cpu.ttwu_local.min
1252 ? 91% -100.0% 0.00 sched_debug.cpu.ttwu_local.stddev
Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.
Thanks,
Oliver Sang