2018-09-06 19:20:34

by Olof Johansson

[permalink] [raw]
Subject: [PATCH] net/sock: move memory_allocated over to percpu_counter variables

Today these are all global shared variables per protocol, and in
particular tcp_memory_allocated can get hot on a system with
large number of CPUs and a substantial number of connections.

Moving it over to a per-cpu variable makes it significantly cheaper,
and the added overhead when summing up the percpu copies is still smaller
than the cost of having a hot cacheline bouncing around.

Signed-off-by: Olof Johansson <[email protected]>
---
crypto/af_alg.c | 10 ++++++++--
include/net/sctp/sctp.h | 3 ++-
include/net/sock.h | 12 ++++++------
include/net/tcp.h | 2 +-
include/net/udp.h | 2 +-
net/core/sock.c | 5 ++++-
net/decnet/af_decnet.c | 3 ++-
net/ipv4/tcp.c | 3 ++-
net/ipv4/udp.c | 4 +++-
net/sctp/protocol.c | 6 ++++++
net/sctp/socket.c | 2 +-
11 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index b053179e0bc5..1fd75a709d7b 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -29,7 +29,7 @@ struct alg_type_list {
struct list_head list;
};

-static atomic_long_t alg_memory_allocated;
+static struct percpu_counter alg_memory_allocated;

static struct proto alg_proto = {
.name = "ALG",
@@ -1183,13 +1183,19 @@ static int __init af_alg_init(void)
if (err)
goto out;

- err = sock_register(&alg_family);
+ err = percpu_counter_init(&alg_memory_allocated, 0, GFP_KERNEL);
if (err != 0)
goto out_unregister_proto;

+ err = sock_register(&alg_family);
+ if (err != 0)
+ goto out_free_percpu;
+
out:
return err;

+out_free_percpu:
+ percpu_counter_destroy(&alg_memory_allocated);
out_unregister_proto:
proto_unregister(&alg_proto);
goto out;
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 8c2caa370e0f..270579cf310b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -36,7 +36,7 @@
* Sridhar Samudrala <[email protected]>
* Ardelle Fan <[email protected]>
* Ryan Layer <[email protected]>
- * Kevin Gao <[email protected]>
+ * Kevin Gao <[email protected]>
*/

#ifndef __net_sctp_h__
@@ -114,6 +114,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock,
void sctp_sock_rfree(struct sk_buff *skb);
void sctp_copy_sock(struct sock *newsk, struct sock *sk,
struct sctp_association *asoc);
+extern struct percpu_counter sctp_memory_allocated;
extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
diff --git a/include/net/sock.h b/include/net/sock.h
index 433f45fc2d68..45aed5e84b5d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1114,7 +1114,7 @@ struct proto {
/* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk);
void (*leave_memory_pressure)(struct sock *sk);
- atomic_long_t *memory_allocated; /* Current allocated memory. */
+ struct percpu_counter *memory_allocated; /* Current allocated memory. */
struct percpu_counter *sockets_allocated; /* Current number of sockets. */
/*
* Pressure flag: try to collapse.
@@ -1237,19 +1237,19 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
static inline long
sk_memory_allocated(const struct sock *sk)
{
- return atomic_long_read(sk->sk_prot->memory_allocated);
+ return percpu_counter_sum_positive(sk->sk_prot->memory_allocated);
}

-static inline long
+static inline void
sk_memory_allocated_add(struct sock *sk, int amt)
{
- return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
+ percpu_counter_add(sk->sk_prot->memory_allocated, amt);
}

static inline void
sk_memory_allocated_sub(struct sock *sk, int amt)
{
- atomic_long_sub(amt, sk->sk_prot->memory_allocated);
+ percpu_counter_sub(sk->sk_prot->memory_allocated, amt);
}

static inline void sk_sockets_allocated_dec(struct sock *sk)
@@ -1277,7 +1277,7 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
static inline long
proto_memory_allocated(struct proto *prot)
{
- return atomic_long_read(prot->memory_allocated);
+ return percpu_counter_sum_positive(prot->memory_allocated);
}

static inline bool
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 770917d0caa7..2df1754cf3ab 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -248,7 +248,7 @@ extern long sysctl_tcp_mem[3];
#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */
#define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */

-extern atomic_long_t tcp_memory_allocated;
+extern struct percpu_counter tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
extern unsigned long tcp_memory_pressure;

diff --git a/include/net/udp.h b/include/net/udp.h
index 8482a990b0bb..9e0d9f7091a0 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -97,7 +97,7 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table,

extern struct proto udp_prot;

-extern atomic_long_t udp_memory_allocated;
+extern struct percpu_counter udp_memory_allocated;

/* sysctl variables for udp */
extern long sysctl_udp_mem[3];
diff --git a/net/core/sock.c b/net/core/sock.c
index 3730eb855095..0a755f6c8942 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2394,9 +2394,12 @@ EXPORT_SYMBOL(sk_wait_data);
int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
struct proto *prot = sk->sk_prot;
- long allocated = sk_memory_allocated_add(sk, amt);
+ long allocated;
bool charged = true;

+ sk_memory_allocated_add(sk, amt);
+ allocated = sk_memory_allocated(sk);
+
if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
!(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
goto suppress_allocation;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 7d6ff983ba2c..f88af9ae4474 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -156,7 +156,7 @@ static const struct proto_ops dn_proto_ops;
static DEFINE_RWLOCK(dn_hash_lock);
static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
static struct hlist_head dn_wild_sk;
-static atomic_long_t decnet_memory_allocated;
+static struct percpu_counter decnet_memory_allocated;

static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
@@ -2356,6 +2356,7 @@ static int __init decnet_init(void)
int rc;

printk(banner);
+ percpu_counter_init(&decnet_memory_allocated, 0, GFP_KERNEL);

rc = proto_register(&dn_proto, 1);
if (rc != 0)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8c4235c098fd..eb6531ba6bd3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -289,7 +289,7 @@ EXPORT_SYMBOL_GPL(tcp_orphan_count);
long sysctl_tcp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_tcp_mem);

-atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
+struct percpu_counter tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);

#if IS_ENABLED(CONFIG_SMC)
@@ -3834,6 +3834,7 @@ void __init tcp_init(void)
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
FIELD_SIZEOF(struct sk_buff, cb));

+ percpu_counter_init(&tcp_memory_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
inet_hashinfo_init(&tcp_hashinfo);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f4e35b2ff8b8..6ec5d2f68ae7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -122,7 +122,7 @@ EXPORT_SYMBOL(udp_table);
long sysctl_udp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_udp_mem);

-atomic_long_t udp_memory_allocated;
+struct percpu_counter udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);

#define MAX_UDP_PORTS 65536
@@ -2923,6 +2923,8 @@ void __init udp_init(void)

__udp_sysctl_init(&init_net);

+ percpu_counter_init(&udp_memory_allocated, 0, GFP_KERNEL);
+
/* 16 spinlocks per cpu */
udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e948db29ab53..ca59ca0dc740 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1391,6 +1391,10 @@ static __init int sctp_init(void)
if (!sctp_chunk_cachep)
goto err_chunk_cachep;

+ status = percpu_counter_init(&sctp_memory_allocated, 0, GFP_KERNEL);
+ if (status)
+ goto err_percpu_memory_init;
+
status = percpu_counter_init(&sctp_sockets_allocated, 0, GFP_KERNEL);
if (status)
goto err_percpu_counter_init;
@@ -1559,6 +1563,8 @@ static __init int sctp_init(void)
err_ehash_alloc:
percpu_counter_destroy(&sctp_sockets_allocated);
err_percpu_counter_init:
+ percpu_counter_destroy(&sctp_memory_allocated);
+err_percpu_memory_init:
kmem_cache_destroy(sctp_chunk_cachep);
err_chunk_cachep:
kmem_cache_destroy(sctp_bucket_cachep);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f73e9d38d5ba..60d55573baa5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -107,7 +107,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
enum sctp_socket_type type);

static unsigned long sctp_memory_pressure;
-static atomic_long_t sctp_memory_allocated;
+struct percpu_counter sctp_memory_allocated;
struct percpu_counter sctp_sockets_allocated;

static void sctp_enter_memory_pressure(struct sock *sk)
--
2.11.0


2018-09-06 19:33:58

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH] net/sock: move memory_allocated over to percpu_counter variables

On Thu, Sep 6, 2018 at 12:21 PM Olof Johansson <[email protected]> wrote:
>
> Today these are all global shared variables per protocol, and in
> particular tcp_memory_allocated can get hot on a system with
> large number of CPUs and a substantial number of connections.
>
> Moving it over to a per-cpu variable makes it significantly cheaper,
> and the added overhead when summing up the percpu copies is still smaller
> than the cost of having a hot cacheline bouncing around.

I am curious. We never noticed contention on this variable, at least for TCP.

Please share some numbers with us.

2018-09-07 03:32:57

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH] net/sock: move memory_allocated over to percpu_counter variables

On Thu, Sep 06, 2018 at 12:33:58PM -0700, Eric Dumazet wrote:
> On Thu, Sep 6, 2018 at 12:21 PM Olof Johansson <[email protected]> wrote:
> >
> > Today these are all global shared variables per protocol, and in
> > particular tcp_memory_allocated can get hot on a system with
> > large number of CPUs and a substantial number of connections.
> >
> > Moving it over to a per-cpu variable makes it significantly cheaper,
> > and the added overhead when summing up the percpu copies is still smaller
> > than the cost of having a hot cacheline bouncing around.
>
> I am curious. We never noticed contention on this variable, at least for TCP.

Yes these variables are heavily amortised so I'm surprised that
they would cause much contention.

> Please share some numbers with us.

Indeed.

Thanks,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

2018-09-07 06:20:15

by Olof Johansson

[permalink] [raw]
Subject: Re: [PATCH] net/sock: move memory_allocated over to percpu_counter variables

Hi,

On Thu, Sep 6, 2018 at 8:32 PM, Herbert Xu <[email protected]> wrote:
> On Thu, Sep 06, 2018 at 12:33:58PM -0700, Eric Dumazet wrote:
>> On Thu, Sep 6, 2018 at 12:21 PM Olof Johansson <[email protected]> wrote:
>> >
>> > Today these are all global shared variables per protocol, and in
>> > particular tcp_memory_allocated can get hot on a system with
>> > large number of CPUs and a substantial number of connections.
>> >
>> > Moving it over to a per-cpu variable makes it significantly cheaper,
>> > and the added overhead when summing up the percpu copies is still smaller
>> > than the cost of having a hot cacheline bouncing around.
>>
>> I am curious. We never noticed contention on this variable, at least for TCP.
>
> Yes these variables are heavily amortised so I'm surprised that
> they would cause much contention.
>
>> Please share some numbers with us.
>
> Indeed.

Certainly, just had to collect them again.

This is on a dual xeon box, with ~150-200k TCP connections. I see
about .7% CPU spent in __sk_mem_{reduce,raise}_allocated in the
inlined atomic ops, most of those in reduce.

Call path for reduce is practically all from tcp_write_timer on softirq:

__sk_mem_reduce_allocated
tcp_write_timer
call_timer_fn
run_timer_softirq
__do_softirq
irq_exit
smp_apic_timer_interrupt
apic_timer_interrupt
cpuidle_enter_state

With this patch, I see about .18+.11+.07 = .36% in percpu-related
functions called from the same __sk_mem functions.

Now, that's a halving of cycles samples on that specific setup. The
real difference though, is on another platform where atomics are more
expensive. There, this makes a significant difference. Unfortunately,
I can't share specifics but I think this change stands on its own on
the dual xeon setup as well, maybe with slightly less strong wording
on just how hot the variable/line happens to be.


-Olof

2018-09-07 07:03:13

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH] net/sock: move memory_allocated over to percpu_counter variables

On Thu, Sep 6, 2018 at 11:20 PM Olof Johansson <[email protected]> wrote:
>
> Hi,
>
> On Thu, Sep 6, 2018 at 8:32 PM, Herbert Xu <[email protected]> wrote:
> > On Thu, Sep 06, 2018 at 12:33:58PM -0700, Eric Dumazet wrote:
> >> On Thu, Sep 6, 2018 at 12:21 PM Olof Johansson <[email protected]> wrote:
> >> >
> >> > Today these are all global shared variables per protocol, and in
> >> > particular tcp_memory_allocated can get hot on a system with
> >> > large number of CPUs and a substantial number of connections.
> >> >
> >> > Moving it over to a per-cpu variable makes it significantly cheaper,
> >> > and the added overhead when summing up the percpu copies is still smaller
> >> > than the cost of having a hot cacheline bouncing around.
> >>
> >> I am curious. We never noticed contention on this variable, at least for TCP.
> >
> > Yes these variables are heavily amortised so I'm surprised that
> > they would cause much contention.
> >
> >> Please share some numbers with us.
> >
> > Indeed.
>
> Certainly, just had to collect them again.
>
> This is on a dual xeon box, with ~150-200k TCP connections. I see
> about .7% CPU spent in __sk_mem_{reduce,raise}_allocated in the
> inlined atomic ops, most of those in reduce.
>
> Call path for reduce is practically all from tcp_write_timer on softirq:
>
> __sk_mem_reduce_allocated
> tcp_write_timer
> call_timer_fn
> run_timer_softirq
> __do_softirq
> irq_exit
> smp_apic_timer_interrupt
> apic_timer_interrupt
> cpuidle_enter_state
>
> With this patch, I see about .18+.11+.07 = .36% in percpu-related
> functions called from the same __sk_mem functions.
>
> Now, that's a halving of cycles samples on that specific setup. The
> real difference though, is on another platform where atomics are more
> expensive. There, this makes a significant difference. Unfortunately,
> I can't share specifics but I think this change stands on its own on
> the dual xeon setup as well, maybe with slightly less strong wording
> on just how hot the variable/line happens to be.


Problem is : we have platforms with more than 100 cpus, and
sk_memory_allocated() cost will be too expensive,
especially if the host is under memory pressure, since all cpus will
touch their private counter.

per cpu variables do not really scale, they were ok 10 years ago when
no more than 16 cpus were the norm.

I would prefer change TCP to not aggressively call
__sk_mem_reduce_allocated() from tcp_write_timer()

Ideally only tcp_retransmit_timer() should attempt to reduce forward
allocations, after recurring timeout.

Note that after 20c64d5cd5a2bdcdc8982a06cb05e5e1bd851a3d ("net: avoid
sk_forward_alloc overflows")
we have better control over sockets having huge forward allocations.

Something like :

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7fdf222a0bdfe9775970082f6b5dcdcc82b2ae1a..7e2e17cde9b6a9be835edfac26b64f4ce9411538
100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -505,6 +505,8 @@ void tcp_retransmit_timer(struct sock *sk)
mib_idx = LINUX_MIB_TCPTIMEOUTS;
}
__NET_INC_STATS(sock_net(sk), mib_idx);
+ } else {
+ sk_mem_reclaim(sk);
}

tcp_enter_loss(sk);
@@ -576,11 +578,11 @@ void tcp_write_timer_handler(struct sock *sk)

if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
!icsk->icsk_pending)
- goto out;
+ return;

if (time_after(icsk->icsk_timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
icsk->icsk_timeout);
- goto out;
+ return;
}

tcp_mstamp_refresh(tcp_sk(sk));
@@ -602,9 +604,6 @@ void tcp_write_timer_handler(struct sock *sk)
tcp_probe_timer(sk);
break;
}
-
-out:
- sk_mem_reclaim(sk);
}

2018-09-07 07:21:46

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH] net/sock: move memory_allocated over to percpu_counter variables

On Fri, Sep 7, 2018 at 12:03 AM Eric Dumazet <[email protected]> wrote:

> Problem is : we have platforms with more than 100 cpus, and
> sk_memory_allocated() cost will be too expensive,
> especially if the host is under memory pressure, since all cpus will
> touch their private counter.
>
> per cpu variables do not really scale, they were ok 10 years ago when
> no more than 16 cpus were the norm.
>
> I would prefer change TCP to not aggressively call
> __sk_mem_reduce_allocated() from tcp_write_timer()
>
> Ideally only tcp_retransmit_timer() should attempt to reduce forward
> allocations, after recurring timeout.
>
> Note that after 20c64d5cd5a2bdcdc8982a06cb05e5e1bd851a3d ("net: avoid
> sk_forward_alloc overflows")
> we have better control over sockets having huge forward allocations.
>
> Something like :

Or something less risky :

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7fdf222a0bdfe9775970082f6b5dcdcc82b2ae1a..0aee80b6966cb2898e46350c761f9eb431ff1206
100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -604,7 +604,8 @@ void tcp_write_timer_handler(struct sock *sk)
}

out:
- sk_mem_reclaim(sk);
+ if (tcp_under_memory_pressure(sk))
+ sk_mem_reclaim(sk);
}

static void tcp_write_timer(struct timer_list *t)

2018-09-08 17:02:42

by Olof Johansson

[permalink] [raw]
Subject: Re: [PATCH] net/sock: move memory_allocated over to percpu_counter variables

Hi,

On Fri, Sep 7, 2018 at 12:21 AM, Eric Dumazet <[email protected]> wrote:
> On Fri, Sep 7, 2018 at 12:03 AM Eric Dumazet <[email protected]> wrote:
>
>> Problem is : we have platforms with more than 100 cpus, and
>> sk_memory_allocated() cost will be too expensive,
>> especially if the host is under memory pressure, since all cpus will
>> touch their private counter.
>>
>> per cpu variables do not really scale, they were ok 10 years ago when
>> no more than 16 cpus were the norm.
>>
>> I would prefer change TCP to not aggressively call
>> __sk_mem_reduce_allocated() from tcp_write_timer()
>>
>> Ideally only tcp_retransmit_timer() should attempt to reduce forward
>> allocations, after recurring timeout.
>>
>> Note that after 20c64d5cd5a2bdcdc8982a06cb05e5e1bd851a3d ("net: avoid
>> sk_forward_alloc overflows")
>> we have better control over sockets having huge forward allocations.
>>
>> Something like :
>
> Or something less risky :

I gave both of these patches a run, and neither do as well on the
system that has slower atomics. :(

The percpu version:

8.05% workload [kernel.vmlinux]
[k] __do_softirq
7.04% swapper [kernel.vmlinux]
[k] cpuidle_enter_state
5.54% workload [kernel.vmlinux]
[k] _raw_spin_unlock_irqrestore
1.66% swapper [kernel.vmlinux]
[k] __do_softirq
1.55% workload [kernel.vmlinux]
[k] finish_task_switch
1.24% swapper [kernel.vmlinux]
[k] finish_task_switch
1.07% workload [kernel.vmlinux]
[k] net_rx_action

The first patch from you still has significant amount of time spent in
the atomics paths (non-inlined versions used):

7.87% workload [kernel.vmlinux]
[k] __ll_sc_atomic64_sub
7.48% workload [kernel.vmlinux]
[k] __do_softirq
5.05% workload [kernel.vmlinux]
[k] _raw_spin_unlock_irqrestore
2.42% workload [kernel.vmlinux]
[k] __ll_sc_atomic64_add_return
1.49% swapper [kernel.vmlinux]
[k] cpuidle_enter_state
1.31% workload [kernel.vmlinux]
[k] finish_task_switch
1.09% workload [kernel.vmlinux]
[k] tcp_sendmsg_locked
1.08% workload [kernel.vmlinux]
[k] __arch_copy_from_user
1.02% workload [kernel.vmlinux]
[k] net_rx_action

I think a lot of the overhead from percpu approach can be alleviated
if we can use percpu_counter_read() instead of _sum() (i.e. no need to
iterate through the local per-cpu recent delta). I don't know the TCP
stack well enough to tell where it's OK to use a bit of slack in the
numbers though -- by default count will at most be off by 32*online
cpus. Might not be a significant number in reality.


-Olof

2018-09-09 18:38:37

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH] net/sock: move memory_allocated over to percpu_counter variables

On Sat, Sep 8, 2018 at 10:02 AM Olof Johansson <[email protected]> wrote:
>
> Hi,
>
> On Fri, Sep 7, 2018 at 12:21 AM, Eric Dumazet <[email protected]> wrote:
> > On Fri, Sep 7, 2018 at 12:03 AM Eric Dumazet <[email protected]> wrote:
> >
> >> Problem is : we have platforms with more than 100 cpus, and
> >> sk_memory_allocated() cost will be too expensive,
> >> especially if the host is under memory pressure, since all cpus will
> >> touch their private counter.
> >>
> >> per cpu variables do not really scale, they were ok 10 years ago when
> >> no more than 16 cpus were the norm.
> >>
> >> I would prefer change TCP to not aggressively call
> >> __sk_mem_reduce_allocated() from tcp_write_timer()
> >>
> >> Ideally only tcp_retransmit_timer() should attempt to reduce forward
> >> allocations, after recurring timeout.
> >>
> >> Note that after 20c64d5cd5a2bdcdc8982a06cb05e5e1bd851a3d ("net: avoid
> >> sk_forward_alloc overflows")
> >> we have better control over sockets having huge forward allocations.
> >>
> >> Something like :
> >
> > Or something less risky :
>
> I gave both of these patches a run, and neither do as well on the
> system that has slower atomics. :(
>
> The percpu version:
>
> 8.05% workload [kernel.vmlinux]
> [k] __do_softirq
> 7.04% swapper [kernel.vmlinux]
> [k] cpuidle_enter_state
> 5.54% workload [kernel.vmlinux]
> [k] _raw_spin_unlock_irqrestore
> 1.66% swapper [kernel.vmlinux]
> [k] __do_softirq
> 1.55% workload [kernel.vmlinux]
> [k] finish_task_switch
> 1.24% swapper [kernel.vmlinux]
> [k] finish_task_switch
> 1.07% workload [kernel.vmlinux]
> [k] net_rx_action
>
> The first patch from you still has significant amount of time spent in
> the atomics paths (non-inlined versions used):
>
> 7.87% workload [kernel.vmlinux]
> [k] __ll_sc_atomic64_sub


The second patch I gave should not enter this path at all, please try it.

> 7.48% workload [kernel.vmlinux]
> [k] __do_softirq
> 5.05% workload [kernel.vmlinux]
> [k] _raw_spin_unlock_irqrestore
> 2.42% workload [kernel.vmlinux]
> [k] __ll_sc_atomic64_add_return
> 1.49% swapper [kernel.vmlinux]
> [k] cpuidle_enter_state
> 1.31% workload [kernel.vmlinux]
> [k] finish_task_switch
> 1.09% workload [kernel.vmlinux]
> [k] tcp_sendmsg_locked
> 1.08% workload [kernel.vmlinux]
> [k] __arch_copy_from_user
> 1.02% workload [kernel.vmlinux]
> [k] net_rx_action
>
> I think a lot of the overhead from percpu approach can be alleviated
> if we can use percpu_counter_read() instead of _sum() (i.e. no need to
> iterate through the local per-cpu recent delta). I don't know the TCP
> stack well enough to tell where it's OK to use a bit of slack in the
> numbers though -- by default count will at most be off by 32*online
> cpus. Might not be a significant number in reality.

2018-09-18 09:37:22

by Chen, Rong A

[permalink] [raw]
Subject: [LKP] [net/sock] b99259a614: netperf.Throughput_Mbps -6.6% regression

Greeting,

FYI, we noticed a -6.6% regression of netperf.Throughput_Mbps due to commit:


commit: b99259a61450bb6403bbbbf279f8754a441eae8b ("[PATCH] net/sock: move memory_allocated over to percpu_counter variables")
url: https://github.com/0day-ci/linux/commits/Olof-Johansson/net-sock-move-memory_allocated-over-to-percpu_counter-variables/20180907-095536


in testcase: netperf
on test machine: 8 threads Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz with 8G memory
with following parameters:

ip: ipv4
runtime: 300s
nr_threads: 200%
cluster: cs-localhost
send_size: 5K
test: TCP_SENDFILE
ucode: 0x25
cpufreq_governor: performance

test-description: Netperf is a benchmark that can be use to measure various aspect of networking performance.
test-url: http://www.netperf.org/netperf/

In addition to that, the commit also has significant impact on the following tests:

+------------------+-----------------------------------------------------------------------+
| testcase: change | netperf: netperf.Throughput_total_tps -49.3% improvement |
| test machine | 88 threads Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz with 128G memory |
| test parameters | cluster=cs-localhost |
| | cpufreq_governor=performance |
| | ip=ipv4 |
| | nr_threads=50% |
| | runtime=300s |
| | test=UDP_RR |
+------------------+-----------------------------------------------------------------------+
| testcase: change | netperf: netperf.Throughput_total_tps -13.5% improvement |
| test machine | 88 threads Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz with 128G memory |
| test parameters | cluster=cs-localhost |
| | cpufreq_governor=performance |
| | ip=ipv4 |
| | nr_threads=25% |
| | runtime=300s |
| | test=UDP_RR |
+------------------+-----------------------------------------------------------------------+


Details are as below:
-------------------------------------------------------------------------------------------------->


To reproduce:

git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp install job.yaml # job file is attached in this email
bin/lkp run job.yaml

=========================================================================================
cluster/compiler/cpufreq_governor/ip/kconfig/nr_threads/rootfs/runtime/send_size/tbox_group/test/testcase/ucode:
cs-localhost/gcc-7/performance/ipv4/x86_64-rhel-7.2/200%/debian-x86_64-2018-04-03.cgz/300s/5K/lkp-hsw-d01/TCP_SENDFILE/netperf/0x25

commit:
6da410d97f (" mlx5e-fixes-2018-09-05")
b99259a614 ("net/sock: move memory_allocated over to percpu_counter variables")

6da410d97ffa486e b99259a61450bb6403bbbbf279
---------------- --------------------------
fail:runs %reproduction fail:runs
| | |
1:4 -25% :4 dmesg.RIP:drm_wait_one_vblank[drm]
1:4 -25% :4 dmesg.WARNING:at_drivers/gpu/drm/drm_vblank.c:#drm_wait_one_vblank[drm]
:4 25% 1:4 dmesg.WARNING:at_ip_fsnotify/0x
1:4 -25% :4 kmsg.drm:drm_atomic_helper_wait_for_dependencies[drm_kms_helper]]*ERROR*[CONNECTOR:#:VGA-#]flip_done_timed_out
1:4 -25% :4 kmsg.drm:drm_atomic_helper_wait_for_dependencies[drm_kms_helper]]*ERROR*[CRTC:#:pipe_A]flip_done_timed_out
1:4 -25% :4 kmsg.drm:drm_atomic_helper_wait_for_dependencies[drm_kms_helper]]*ERROR*[PLANE:#:primary_A]flip_done_timed_out
1:4 -25% :4 kmsg.drm:drm_atomic_helper_wait_for_flip_done[drm_kms_helper]]*ERROR*[CRTC:#:pipe_A]flip_done_timed_out
0:4 0% 0:4 perf-profile.children.cycles-pp.schedule_timeout
%stddev %change %stddev
\ | \
7990 -6.6% 7460 netperf.Throughput_Mbps
127841 -6.6% 119362 netperf.Throughput_total_Mbps
1874683 -2.1% 1835777 netperf.time.involuntary_context_switches
643.25 +1.3% 651.50 netperf.time.percent_of_cpu_this_job_got
1792 +2.1% 1830 netperf.time.system_time
144.93 -8.1% 133.19 netperf.time.user_time
1099277 -7.7% 1014302 netperf.time.voluntary_context_switches
9.363e+08 -6.6% 8.742e+08 netperf.workload
30722 -4.2% 29428 vmstat.system.cs
25888 ± 31% +77.6% 45981 ± 25% sched_debug.cfs_rq:/.min_vruntime.stddev
25888 ± 31% +77.6% 45981 ± 25% sched_debug.cfs_rq:/.spread0.stddev
7151 ± 4% -20.0% 5720 ± 5% slabinfo.anon_vma_chain.active_objs
7409 ± 3% -21.7% 5804 ± 6% slabinfo.anon_vma_chain.num_objs
38618 ± 10% -31.7% 26365 ± 22% turbostat.C1E
0.13 ± 15% -0.1 0.08 ± 44% turbostat.C1E%
161917 ± 4% -24.6% 122121 ± 15% cpuidle.C1.time
3215895 ± 15% -38.8% 1966692 ± 43% cpuidle.C1E.time
38639 ± 10% -31.8% 26368 ± 22% cpuidle.C1E.usage
1.305e+12 -1.4% 1.287e+12 perf-stat.branch-instructions
3.00 -0.0 2.97 perf-stat.branch-miss-rate%
3.92e+10 -2.5% 3.822e+10 perf-stat.branch-misses
0.69 ± 9% -0.2 0.51 ± 14% perf-stat.cache-miss-rate%
2.1e+10 +17.9% 2.477e+10 perf-stat.cache-references
9364894 -4.2% 8976073 perf-stat.context-switches
1.31 +2.0% 1.33 perf-stat.cpi
0.09 -0.0 0.08 perf-stat.dTLB-load-miss-rate%
2.02e+09 -15.8% 1.701e+09 perf-stat.dTLB-load-misses
2.201e+12 -3.0% 2.135e+12 perf-stat.dTLB-loads
1.38e+12 -4.1% 1.325e+12 perf-stat.dTLB-stores
4.827e+08 ± 8% -19.0% 3.908e+08 ± 9% perf-stat.iTLB-loads
6.749e+12 -1.9% 6.62e+12 perf-stat.instructions
0.76 -1.9% 0.75 perf-stat.ipc
7207 +5.1% 7572 perf-stat.path-length
18.44 -1.4 17.04 perf-profile.calltrace.cycles-pp.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
18.46 -1.4 17.06 perf-profile.calltrace.cycles-pp.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
18.25 -1.4 16.86 perf-profile.calltrace.cycles-pp.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
18.17 -1.4 16.80 perf-profile.calltrace.cycles-pp.tcp_recvmsg.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64
13.45 -1.2 12.24 perf-profile.calltrace.cycles-pp.skb_copy_datagram_iter.tcp_recvmsg.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom
12.68 -1.1 11.57 perf-profile.calltrace.cycles-pp.copy_page_to_iter.skb_copy_datagram_iter.tcp_recvmsg.inet_recvmsg.__sys_recvfrom
11.95 -0.9 11.00 ± 2% perf-profile.calltrace.cycles-pp.generic_file_splice_read.splice_direct_to_actor.do_splice_direct.do_sendfile.__x64_sys_sendfile64
9.53 ± 2% -0.9 8.68 perf-profile.calltrace.cycles-pp.copyout.copy_page_to_iter.skb_copy_datagram_iter.tcp_recvmsg.inet_recvmsg
9.35 ± 2% -0.8 8.54 perf-profile.calltrace.cycles-pp.copy_user_enhanced_fast_string.copyout.copy_page_to_iter.skb_copy_datagram_iter.tcp_recvmsg
9.50 -0.8 8.73 ± 2% perf-profile.calltrace.cycles-pp.generic_file_read_iter.generic_file_splice_read.splice_direct_to_actor.do_splice_direct.do_sendfile
10.73 ± 2% -0.5 10.21 perf-profile.calltrace.cycles-pp.__tcp_push_pending_frames.do_tcp_sendpages.tcp_sendpage_locked.tcp_sendpage.inet_sendpage
10.56 ± 2% -0.4 10.11 perf-profile.calltrace.cycles-pp.tcp_write_xmit.__tcp_push_pending_frames.do_tcp_sendpages.tcp_sendpage_locked.tcp_sendpage
2.99 ± 3% -0.4 2.59 ± 4% perf-profile.calltrace.cycles-pp.security_file_permission.do_splice_direct.do_sendfile.__x64_sys_sendfile64.do_syscall_64
4.94 ± 2% -0.3 4.60 ± 3% perf-profile.calltrace.cycles-pp.security_file_permission.do_sendfile.__x64_sys_sendfile64.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.70 ± 14% -0.3 0.43 ± 58% perf-profile.calltrace.cycles-pp.__inode_security_revalidate.selinux_file_permission.security_file_permission.do_splice_direct.do_sendfile
3.04 ± 2% -0.3 2.77 ± 4% perf-profile.calltrace.cycles-pp.do_splice_to.splice_direct_to_actor.do_splice_direct.do_sendfile.__x64_sys_sendfile64
2.62 ± 3% -0.2 2.39 ± 2% perf-profile.calltrace.cycles-pp.pagecache_get_page.generic_file_read_iter.generic_file_splice_read.splice_direct_to_actor.do_splice_direct
2.31 ± 2% -0.2 2.09 ± 2% perf-profile.calltrace.cycles-pp.find_get_entry.pagecache_get_page.generic_file_read_iter.generic_file_splice_read.splice_direct_to_actor
1.52 ± 5% -0.2 1.31 ± 8% perf-profile.calltrace.cycles-pp.selinux_file_permission.security_file_permission.do_splice_direct.do_sendfile.__x64_sys_sendfile64
1.14 ± 4% -0.2 0.93 ± 10% perf-profile.calltrace.cycles-pp.__fget_light.do_sendfile.__x64_sys_sendfile64.do_syscall_64.entry_SYSCALL_64_after_hwframe
1.88 ± 2% -0.2 1.68 perf-profile.calltrace.cycles-pp.lock_sock_nested.tcp_sendpage.inet_sendpage.kernel_sendpage.sock_sendpage
1.11 ± 16% -0.2 0.92 ± 3% perf-profile.calltrace.cycles-pp.__kfree_skb.tcp_recvmsg.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom
3.21 -0.2 3.02 ± 2% perf-profile.calltrace.cycles-pp.syscall_return_via_sysret
0.74 ± 23% -0.2 0.56 ± 6% perf-profile.calltrace.cycles-pp.skb_release_data.__kfree_skb.tcp_recvmsg.inet_recvmsg.__sys_recvfrom
1.19 ± 7% -0.2 1.02 ± 5% perf-profile.calltrace.cycles-pp.file_has_perm.security_file_permission.do_splice_direct.do_sendfile.__x64_sys_sendfile64
1.20 ± 3% -0.1 1.05 ± 5% perf-profile.calltrace.cycles-pp.__might_fault.copy_page_to_iter.skb_copy_datagram_iter.tcp_recvmsg.inet_recvmsg
1.19 ± 5% -0.1 1.06 ± 6% perf-profile.calltrace.cycles-pp.file_has_perm.security_file_permission.do_sendfile.__x64_sys_sendfile64.do_syscall_64
0.80 ± 4% -0.1 0.68 perf-profile.calltrace.cycles-pp.___might_sleep.__might_fault.copy_page_to_iter.skb_copy_datagram_iter.tcp_recvmsg
0.90 ± 6% -0.1 0.78 ± 6% perf-profile.calltrace.cycles-pp.atime_needs_update.touch_atime.splice_direct_to_actor.do_splice_direct.do_sendfile
0.71 ± 2% -0.1 0.61 ± 5% perf-profile.calltrace.cycles-pp.sched_clock_cpu.tcp_write_xmit.__tcp_push_pending_frames.do_tcp_sendpages.tcp_sendpage_locked
1.07 ± 3% -0.1 0.96 ± 3% perf-profile.calltrace.cycles-pp.radix_tree_lookup_slot.find_get_entry.pagecache_get_page.generic_file_read_iter.generic_file_splice_read
0.74 ± 6% -0.1 0.64 ± 5% perf-profile.calltrace.cycles-pp._raw_spin_lock_bh.release_sock.tcp_sendpage.inet_sendpage.kernel_sendpage
0.63 ± 3% -0.1 0.55 ± 6% perf-profile.calltrace.cycles-pp.native_sched_clock.sched_clock.sched_clock_cpu.tcp_write_xmit.__tcp_push_pending_frames
0.70 ± 3% -0.1 0.63 ± 4% perf-profile.calltrace.cycles-pp.__tcp_transmit_skb.tcp_write_xmit.__tcp_push_pending_frames.tcp_rcv_established.tcp_v4_do_rcv
0.78 ± 3% -0.1 0.71 ± 5% perf-profile.calltrace.cycles-pp.__radix_tree_lookup.radix_tree_lookup_slot.find_get_entry.pagecache_get_page.generic_file_read_iter
0.82 ± 3% -0.1 0.76 ± 3% perf-profile.calltrace.cycles-pp.__tcp_push_pending_frames.tcp_rcv_established.tcp_v4_do_rcv.tcp_v4_rcv.ip_local_deliver_finish
5.58 +0.1 5.71 perf-profile.calltrace.cycles-pp.net_rx_action.__softirqentry_text_start.do_softirq_own_stack.do_softirq.__local_bh_enable_ip
5.35 +0.1 5.49 perf-profile.calltrace.cycles-pp.process_backlog.net_rx_action.__softirqentry_text_start.do_softirq_own_stack.do_softirq
0.91 ± 4% +0.1 1.04 ± 3% perf-profile.calltrace.cycles-pp.sk_stream_alloc_skb.do_tcp_sendpages.tcp_sendpage_locked.tcp_sendpage.inet_sendpage
90.58 +0.4 90.99 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.00 +0.5 0.55 ± 6% perf-profile.calltrace.cycles-pp.__release_sock.release_sock.tcp_recvmsg.inet_recvmsg.__sys_recvfrom
0.00 +0.6 0.60 ± 6% perf-profile.calltrace.cycles-pp.release_sock.tcp_recvmsg.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom
0.00 +0.6 0.65 ± 18% perf-profile.calltrace.cycles-pp.tcp_data_queue.tcp_rcv_established.tcp_v4_do_rcv.tcp_v4_rcv.ip_local_deliver_finish
0.00 +0.7 0.68 ± 2% perf-profile.calltrace.cycles-pp.find_next_bit.cpumask_next.__percpu_counter_sum.__sk_mem_raise_allocated.__sk_mem_schedule
0.00 +0.8 0.78 ± 13% perf-profile.calltrace.cycles-pp.percpu_counter_add_batch.__sk_mem_raise_allocated.__sk_mem_schedule.do_tcp_sendpages.tcp_sendpage_locked
0.00 +0.8 0.84 perf-profile.calltrace.cycles-pp.cpumask_next.__percpu_counter_sum.__sk_mem_raise_allocated.__sk_mem_schedule.do_tcp_sendpages
0.00 +1.7 1.65 ± 4% perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock_irqsave.__percpu_counter_sum.__sk_mem_raise_allocated.__sk_mem_schedule
70.71 +2.0 72.70 perf-profile.calltrace.cycles-pp.__x64_sys_sendfile64.do_syscall_64.entry_SYSCALL_64_after_hwframe
68.26 +2.2 70.47 perf-profile.calltrace.cycles-pp.do_sendfile.__x64_sys_sendfile64.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.00 +2.8 2.85 ± 5% perf-profile.calltrace.cycles-pp._raw_spin_lock_irqsave.__percpu_counter_sum.__sk_mem_raise_allocated.__sk_mem_schedule.do_tcp_sendpages
57.51 +3.2 60.70 perf-profile.calltrace.cycles-pp.do_splice_direct.do_sendfile.__x64_sys_sendfile64.do_syscall_64.entry_SYSCALL_64_after_hwframe
54.10 +3.6 57.72 perf-profile.calltrace.cycles-pp.splice_direct_to_actor.do_splice_direct.do_sendfile.__x64_sys_sendfile64.do_syscall_64
36.84 +4.9 41.77 perf-profile.calltrace.cycles-pp.direct_splice_actor.splice_direct_to_actor.do_splice_direct.do_sendfile.__x64_sys_sendfile64
35.89 +5.0 40.88 perf-profile.calltrace.cycles-pp.splice_from_pipe.direct_splice_actor.splice_direct_to_actor.do_splice_direct.do_sendfile
35.00 +5.0 40.01 perf-profile.calltrace.cycles-pp.__splice_from_pipe.splice_from_pipe.direct_splice_actor.splice_direct_to_actor.do_splice_direct
30.95 +5.0 36.00 perf-profile.calltrace.cycles-pp.pipe_to_sendpage.__splice_from_pipe.splice_from_pipe.direct_splice_actor.splice_direct_to_actor
29.68 +5.2 34.87 perf-profile.calltrace.cycles-pp.sock_sendpage.pipe_to_sendpage.__splice_from_pipe.splice_from_pipe.direct_splice_actor
28.59 +5.3 33.84 perf-profile.calltrace.cycles-pp.inet_sendpage.kernel_sendpage.sock_sendpage.pipe_to_sendpage.__splice_from_pipe
28.87 +5.3 34.14 perf-profile.calltrace.cycles-pp.kernel_sendpage.sock_sendpage.pipe_to_sendpage.__splice_from_pipe.splice_from_pipe
27.07 +5.3 32.36 perf-profile.calltrace.cycles-pp.tcp_sendpage.inet_sendpage.kernel_sendpage.sock_sendpage.pipe_to_sendpage
21.40 +5.7 27.15 perf-profile.calltrace.cycles-pp.tcp_sendpage_locked.tcp_sendpage.inet_sendpage.kernel_sendpage.sock_sendpage
20.54 +5.8 26.38 perf-profile.calltrace.cycles-pp.do_tcp_sendpages.tcp_sendpage_locked.tcp_sendpage.inet_sendpage.kernel_sendpage
0.00 +5.9 5.93 ± 4% perf-profile.calltrace.cycles-pp.__percpu_counter_sum.__sk_mem_raise_allocated.__sk_mem_schedule.do_tcp_sendpages.tcp_sendpage_locked
1.22 ± 4% +6.2 7.47 ± 3% perf-profile.calltrace.cycles-pp.__sk_mem_raise_allocated.__sk_mem_schedule.do_tcp_sendpages.tcp_sendpage_locked.tcp_sendpage
1.32 ± 3% +6.3 7.60 ± 3% perf-profile.calltrace.cycles-pp.__sk_mem_schedule.do_tcp_sendpages.tcp_sendpage_locked.tcp_sendpage.inet_sendpage
18.44 -1.4 17.04 perf-profile.children.cycles-pp.__sys_recvfrom
18.46 -1.4 17.06 perf-profile.children.cycles-pp.__x64_sys_recvfrom
18.25 -1.4 16.86 perf-profile.children.cycles-pp.inet_recvmsg
18.19 -1.4 16.81 perf-profile.children.cycles-pp.tcp_recvmsg
15.06 -1.2 13.82 perf-profile.children.cycles-pp.copy_page_to_iter
13.47 -1.2 12.25 perf-profile.children.cycles-pp.skb_copy_datagram_iter
12.00 -1.0 11.05 ± 2% perf-profile.children.cycles-pp.generic_file_splice_read
9.58 ± 2% -0.9 8.72 perf-profile.children.cycles-pp.copyout
10.09 -0.9 9.23 ± 2% perf-profile.children.cycles-pp.security_file_permission
9.42 ± 2% -0.8 8.60 perf-profile.children.cycles-pp.copy_user_enhanced_fast_string
9.68 -0.8 8.90 ± 2% perf-profile.children.cycles-pp.generic_file_read_iter
11.60 ± 2% -0.6 11.03 perf-profile.children.cycles-pp.__tcp_push_pending_frames
11.47 ± 2% -0.5 10.94 perf-profile.children.cycles-pp.tcp_write_xmit
6.05 ± 2% -0.5 5.57 ± 2% perf-profile.children.cycles-pp.selinux_file_permission
3.54 ± 3% -0.3 3.24 ± 2% perf-profile.children.cycles-pp.___might_sleep
2.50 ± 3% -0.3 2.21 ± 5% perf-profile.children.cycles-pp.file_has_perm
3.05 ± 2% -0.3 2.78 ± 4% perf-profile.children.cycles-pp.do_splice_to
2.06 -0.3 1.81 ± 2% perf-profile.children.cycles-pp.fsnotify
2.70 ± 2% -0.2 2.45 ± 2% perf-profile.children.cycles-pp.pagecache_get_page
3.39 ± 3% -0.2 3.15 perf-profile.children.cycles-pp.touch_atime
2.10 ± 2% -0.2 1.87 ± 2% perf-profile.children.cycles-pp.lock_sock_nested
1.29 ± 13% -0.2 1.07 ± 2% perf-profile.children.cycles-pp.__kfree_skb
2.57 ± 4% -0.2 2.35 perf-profile.children.cycles-pp.atime_needs_update
2.37 -0.2 2.15 ± 3% perf-profile.children.cycles-pp.__might_fault
2.34 ± 2% -0.2 2.13 ± 2% perf-profile.children.cycles-pp.find_get_entry
1.21 ± 5% -0.2 1.00 ± 9% perf-profile.children.cycles-pp.__fget_light
3.67 -0.2 3.46 ± 3% perf-profile.children.cycles-pp.syscall_return_via_sysret
2.72 ± 4% -0.2 2.52 ± 2% perf-profile.children.cycles-pp.__inode_security_revalidate
1.61 ± 4% -0.2 1.41 ± 6% perf-profile.children.cycles-pp._raw_spin_lock_bh
0.86 ± 20% -0.2 0.67 ± 5% perf-profile.children.cycles-pp.skb_release_data
0.47 ± 18% -0.2 0.30 ± 28% perf-profile.children.cycles-pp.hrtimer_interrupt
0.99 ± 5% -0.2 0.83 ± 5% perf-profile.children.cycles-pp.__fsnotify_parent
0.41 ± 21% -0.2 0.25 ± 33% perf-profile.children.cycles-pp.__hrtimer_run_queues
0.52 ± 16% -0.2 0.36 ± 24% perf-profile.children.cycles-pp.smp_apic_timer_interrupt
0.56 ± 15% -0.2 0.40 ± 22% perf-profile.children.cycles-pp.apic_timer_interrupt
1.49 ± 2% -0.1 1.34 ± 4% perf-profile.children.cycles-pp.current_time
0.32 ± 25% -0.1 0.18 ± 48% perf-profile.children.cycles-pp.tick_sched_timer
1.77 ± 4% -0.1 1.65 ± 2% perf-profile.children.cycles-pp.__might_sleep
1.10 ± 3% -0.1 0.98 ± 2% perf-profile.children.cycles-pp.radix_tree_lookup_slot
0.70 ± 11% -0.1 0.58 ± 9% perf-profile.children.cycles-pp.nf_hook_slow
0.96 ± 2% -0.1 0.85 ± 4% perf-profile.children.cycles-pp.sched_clock_cpu
0.88 ± 2% -0.1 0.78 ± 3% perf-profile.children.cycles-pp.sched_clock
0.85 ± 2% -0.1 0.75 ± 3% perf-profile.children.cycles-pp.native_sched_clock
0.46 ± 4% -0.1 0.37 ± 8% perf-profile.children.cycles-pp.selinux_ip_postroute
0.37 ± 5% -0.1 0.30 ± 8% perf-profile.children.cycles-pp.selinux_ip_postroute_compat
0.44 ± 3% -0.1 0.38 ± 4% perf-profile.children.cycles-pp.tcp_rate_check_app_limited
0.12 ± 27% -0.1 0.06 ± 6% perf-profile.children.cycles-pp.__copy_skb_header
0.20 ± 16% -0.0 0.15 ± 12% perf-profile.children.cycles-pp.lock_timer_base
0.16 ± 13% -0.0 0.11 ± 18% perf-profile.children.cycles-pp.ip_rcv_core
0.21 ± 7% -0.0 0.17 ± 7% perf-profile.children.cycles-pp.enqueue_to_backlog
0.37 ± 3% -0.0 0.33 ± 6% perf-profile.children.cycles-pp.generic_splice_sendpage
0.25 ± 7% -0.0 0.21 ± 7% perf-profile.children.cycles-pp.iov_iter_pipe
0.06 ± 11% +0.0 0.08 ± 6% perf-profile.children.cycles-pp.__list_del_entry_valid
0.06 ± 13% +0.0 0.08 ± 5% perf-profile.children.cycles-pp.get_nohz_timer_target
0.09 ± 18% +0.0 0.12 ± 6% perf-profile.children.cycles-pp.sk_stream_wait_memory
0.09 ± 11% +0.0 0.12 ± 13% perf-profile.children.cycles-pp.tcp_rearm_rto
0.04 ± 60% +0.1 0.11 ± 14% perf-profile.children.cycles-pp.splice_from_pipe_next
0.91 ± 4% +0.1 1.04 ± 3% perf-profile.children.cycles-pp.sk_stream_alloc_skb
0.11 ± 7% +0.2 0.34 ± 9% perf-profile.children.cycles-pp.tcp_leave_memory_pressure
0.33 ± 10% +0.2 0.58 ± 4% perf-profile.children.cycles-pp.__release_sock
3.19 ± 2% +0.4 3.62 ± 4% perf-profile.children.cycles-pp.tcp_v4_do_rcv
0.55 ± 12% +0.4 0.99 ± 10% perf-profile.children.cycles-pp.tcp_data_queue
3.07 ± 2% +0.4 3.51 ± 5% perf-profile.children.cycles-pp.tcp_rcv_established
0.18 ± 27% +0.4 0.63 ± 15% perf-profile.children.cycles-pp._raw_spin_unlock_irqrestore
90.71 +0.5 91.20 perf-profile.children.cycles-pp.do_syscall_64
0.08 ± 13% +0.5 0.60 ± 12% perf-profile.children.cycles-pp.tcp_try_rmem_schedule
0.00 +0.8 0.77 ± 2% perf-profile.children.cycles-pp.find_next_bit
0.00 +0.9 0.94 ± 10% perf-profile.children.cycles-pp.percpu_counter_add_batch
0.00 +1.1 1.14 ± 3% perf-profile.children.cycles-pp.cpumask_next
0.00 +1.9 1.92 ± 6% perf-profile.children.cycles-pp.native_queued_spin_lock_slowpath
70.92 +2.0 72.87 perf-profile.children.cycles-pp.__x64_sys_sendfile64
68.42 +2.2 70.62 perf-profile.children.cycles-pp.do_sendfile
57.60 +3.2 60.79 perf-profile.children.cycles-pp.do_splice_direct
0.25 ± 13% +3.3 3.54 ± 5% perf-profile.children.cycles-pp._raw_spin_lock_irqsave
54.16 +3.6 57.77 perf-profile.children.cycles-pp.splice_direct_to_actor
36.88 +5.0 41.83 perf-profile.children.cycles-pp.direct_splice_actor
35.98 +5.0 40.97 perf-profile.children.cycles-pp.splice_from_pipe
35.13 +5.0 40.13 perf-profile.children.cycles-pp.__splice_from_pipe
31.00 +5.0 36.03 perf-profile.children.cycles-pp.pipe_to_sendpage
29.75 +5.2 34.97 perf-profile.children.cycles-pp.sock_sendpage
28.64 +5.3 33.89 perf-profile.children.cycles-pp.inet_sendpage
28.94 +5.3 34.22 perf-profile.children.cycles-pp.kernel_sendpage
27.21 +5.3 32.51 perf-profile.children.cycles-pp.tcp_sendpage
21.48 +5.7 27.23 perf-profile.children.cycles-pp.tcp_sendpage_locked
20.69 +5.8 26.53 perf-profile.children.cycles-pp.do_tcp_sendpages
0.00 +6.7 6.67 ± 4% perf-profile.children.cycles-pp.__percpu_counter_sum
1.34 ± 3% +7.0 8.32 ± 3% perf-profile.children.cycles-pp.__sk_mem_raise_allocated
1.46 ± 3% +7.0 8.46 ± 3% perf-profile.children.cycles-pp.__sk_mem_schedule
1.21 ± 3% -0.8 0.40 ± 9% perf-profile.self.cycles-pp.__sk_mem_raise_allocated
9.27 -0.8 8.50 perf-profile.self.cycles-pp.copy_user_enhanced_fast_string
3.41 ± 3% -0.3 3.08 perf-profile.self.cycles-pp.___might_sleep
3.12 ± 3% -0.3 2.86 perf-profile.self.cycles-pp.selinux_file_permission
2.01 ± 2% -0.2 1.76 ± 2% perf-profile.self.cycles-pp.fsnotify
2.52 -0.2 2.28 perf-profile.self.cycles-pp.generic_file_read_iter
2.42 -0.2 2.20 ± 2% perf-profile.self.cycles-pp.do_sendfile
3.66 -0.2 3.45 ± 3% perf-profile.self.cycles-pp.syscall_return_via_sysret
1.18 ± 5% -0.2 0.98 ± 9% perf-profile.self.cycles-pp.__fget_light
1.62 ± 3% -0.2 1.45 ± 5% perf-profile.self.cycles-pp.tcp_write_xmit
1.19 ± 2% -0.2 1.02 ± 5% perf-profile.self.cycles-pp.pipe_to_sendpage
1.44 ± 5% -0.2 1.28 ± 6% perf-profile.self.cycles-pp._raw_spin_lock_bh
0.89 ± 5% -0.1 0.74 ± 4% perf-profile.self.cycles-pp.__fsnotify_parent
1.12 ± 2% -0.1 0.98 ± 4% perf-profile.self.cycles-pp.do_syscall_64
1.02 ± 3% -0.1 0.90 ± 5% perf-profile.self.cycles-pp.__x64_sys_sendfile64
0.81 ± 7% -0.1 0.70 ± 6% perf-profile.self.cycles-pp.file_has_perm
0.71 ± 7% -0.1 0.61 ± 7% perf-profile.self.cycles-pp.skb_copy_datagram_iter
0.89 ± 6% -0.1 0.79 ± 6% perf-profile.self.cycles-pp.tcp_current_mss
0.14 ± 9% -0.1 0.04 ± 59% perf-profile.self.cycles-pp.sched_clock
0.75 ± 4% -0.1 0.66 ± 6% perf-profile.self.cycles-pp.skb_release_data
0.97 ± 3% -0.1 0.88 ± 6% perf-profile.self.cycles-pp.page_cache_pipe_buf_confirm
0.82 -0.1 0.74 ± 6% perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe
0.29 ± 8% -0.1 0.21 ± 8% perf-profile.self.cycles-pp.radix_tree_lookup_slot
0.42 ± 5% -0.1 0.35 ± 4% perf-profile.self.cycles-pp.tcp_rate_check_app_limited
1.57 -0.1 1.51 ± 2% perf-profile.self.cycles-pp.__might_sleep
0.12 ± 27% -0.1 0.06 ± 6% perf-profile.self.cycles-pp.__copy_skb_header
0.15 ± 7% -0.1 0.10 ± 15% perf-profile.self.cycles-pp.selinux_ip_postroute_compat
0.15 ± 14% -0.0 0.11 ± 15% perf-profile.self.cycles-pp.ip_rcv_core
0.17 ± 9% -0.0 0.12 ± 8% perf-profile.self.cycles-pp.ip_output
0.32 -0.0 0.28 ± 4% perf-profile.self.cycles-pp.generic_splice_sendpage
0.14 ± 10% -0.0 0.10 ± 14% perf-profile.self.cycles-pp.enqueue_to_backlog
0.23 ± 9% -0.0 0.19 ± 2% perf-profile.self.cycles-pp.iov_iter_pipe
0.22 ± 9% -0.0 0.18 ± 7% perf-profile.self.cycles-pp.__ip_queue_xmit
0.11 ± 19% -0.0 0.08 ± 8% perf-profile.self.cycles-pp.sock_def_readable
0.11 ± 4% -0.0 0.08 ± 10% perf-profile.self.cycles-pp.bpf_fd_pass
0.11 ± 7% +0.0 0.15 ± 10% perf-profile.self.cycles-pp.tcp_ack
0.01 ±173% +0.1 0.10 ± 8% perf-profile.self.cycles-pp.splice_from_pipe_next
0.09 ± 7% +0.2 0.32 ± 9% perf-profile.self.cycles-pp.tcp_leave_memory_pressure
0.15 ± 16% +0.4 0.52 ± 5% perf-profile.self.cycles-pp._raw_spin_unlock_irqrestore
0.00 +0.4 0.38 ± 12% perf-profile.self.cycles-pp.cpumask_next
0.00 +0.7 0.67 ± 2% perf-profile.self.cycles-pp.find_next_bit
0.00 +0.7 0.72 ± 6% perf-profile.self.cycles-pp.percpu_counter_add_batch
0.24 ± 14% +1.4 1.63 ± 6% perf-profile.self.cycles-pp._raw_spin_lock_irqsave
0.00 +1.9 1.90 ± 6% perf-profile.self.cycles-pp.native_queued_spin_lock_slowpath
0.00 +2.1 2.08 ± 3% perf-profile.self.cycles-pp.__percpu_counter_sum



netperf.Throughput_Mbps

8100 +-+------------------------------------------------------------------+
| ++.+ .++.+++.+ .++. |
8000 +-+++.++. + + +.++.++.++.++.++.++.+++.++.++ ++.++.++.+|
7900 +-+ ++ |
| |
7800 +-+ |
| |
7700 +-+ |
| |
7600 +-+ |
7500 +-+ O O |
| O O O OO OOO OO OO |
7400 +-+ O O |
OO OO O |
7300 +-+------------------------------------------------------------------+


netperf.Throughput_total_Mbps

130000 +-+----------------------------------------------------------------+
| +.+ +.+++.+ +.+ |
128000 +-+++.++. : +.+ : ++.+ +.++.+++.++.+ .+++.++.+ ++.++.++.+|
|+ ++ : : + + |
126000 +-+ : : |
| : : |
124000 +-+ :: |
| :: |
122000 +-+ :: |
| : |
120000 +-+ OO O OO O O |
| O O OO O OO O |
118000 O-+OO O |
|O O |
116000 +-+----------------------------------------------------------------+


netperf.workload

9.5e+08 +-+---------------------------------------------------------------+
9.4e+08 +-+ +.++ .++.+++ +.+ |
|+.++.++ : + : +.+++.++.++.+++.++.+++.++.++ +.+++.++.+|
9.3e+08 +-+ +.+ : : |
9.2e+08 +-+ : : |
| : : |
9.1e+08 +-+ : : |
9e+08 +-+ :: |
8.9e+08 +-+ : |
| : |
8.8e+08 +-+ OO OOO OO O |
8.7e+08 +-+ O OOO O OO |
O OO O |
8.6e+08 +O+ O |
8.5e+08 +-+---------------------------------------------------------------+


netperf.time.user_time

152 +-+-------------------------------------------------------------------+
150 +-+ + |
| :: |
148 +-+ :: |
146 +-+ +. + + + : :+. +.+|
144 +-++ + + .+ .++.+ .++.+ :+ +.++.++.+ :+ +.++. +.+ + + |
142 +-+ + .++ + +.++ +.+ + +.+ + + |
| + |
140 +-+ |
138 +-+ |
136 +-+ O |
134 +-+ O O OO O O O |
| O O O OO O O |
132 OO+OO OO |
130 +-+-------------------------------------------------------------------+


netperf.time.system_time

1840 +-+-O----------------------------------------------------------------+
|O O O |
1830 O-+ O O O O OO O OO |
| OO O O |
| O O O |
1820 +-+ |
| |
1810 +-+ |
| |
1800 +-+ |
| +.+ + +. +. +.+ .++.++. +. ++. +.++. .+ |
| .++.+ +. + +. + + : + + + + + + ++.+ .++ +.+|
1790 +-+ ++ + + + +: + |
| + + |
1780 +-+------------------------------------------------------------------+


netperf.time.voluntary_context_switches

1.12e+06 +-+--------------------------------------------------------------+
| + + + + + + +. .+ : + .+|
1.1e+06 +-+++. : + + : + + +.++ + + .++.+ :.+ : + .+++ :: +.+++.++ |
1.08e+06 +-+ + :.+ + : + + + + + + + |
| + + |
1.06e+06 +-+ |
| |
1.04e+06 +-+ |
| |
1.02e+06 +-+ OO O OO O |
1e+06 +-+ OO OO O O O |
|O O O |
980000 O-+OO O |
| |
960000 +-+----O---------------------------------------------------------+


[*] bisect-good sample
[O] bisect-bad sample

***************************************************************************************************
lkp-bdw-ep2: 88 threads Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz with 128G memory
=========================================================================================
cluster/compiler/cpufreq_governor/ip/kconfig/nr_threads/rootfs/runtime/tbox_group/test/testcase:
cs-localhost/gcc-7/performance/ipv4/x86_64-rhel-7.2/50%/debian-x86_64-2018-04-03.cgz/300s/lkp-bdw-ep2/UDP_RR/netperf

commit:
6da410d97f (" mlx5e-fixes-2018-09-05")
b99259a614 ("net/sock: move memory_allocated over to percpu_counter variables")

6da410d97ffa486e b99259a61450bb6403bbbbf279
---------------- --------------------------
fail:runs %reproduction fail:runs
| | |
:4 25% 1:4 dmesg.WARNING:at#for_ip_interrupt_entry/0x
1:4 -25% :4 dmesg.WARNING:at_ip_native_sched_clock/0x
11:4 -149% 5:4 perf-profile.calltrace.cycles-pp.dequeue_entity.dequeue_task_fair.__sched_text_start.schedule.schedule_timeout
4:4 -54% 2:4 perf-profile.calltrace.cycles-pp.switch_mm_irqs_off.switch_mm.__sched_text_start.schedule.schedule_timeout
31:4 -378% 16:4 perf-profile.children.cycles-pp.schedule_timeout
1:4 -12% 0:4 perf-profile.self.cycles-pp.schedule_timeout
%stddev %change %stddev
\ | \
3140615 ± 2% -49.3% 1593635 netperf.Throughput_total_tps
71377 ± 2% -49.3% 36218 netperf.Throughput_tps
6877 ± 4% +11.8% 7689 netperf.time.involuntary_context_switches
2795 -10.5% 2501 netperf.time.percent_of_cpu_this_job_got
7688 -6.4% 7194 netperf.time.system_time
727.56 ± 3% -53.7% 336.63 ± 6% netperf.time.user_time
9.335e+08 ± 3% -48.8% 4.781e+08 netperf.time.voluntary_context_switches
9.422e+08 ± 2% -49.3% 4.781e+08 netperf.workload
497.45 -10.7% 444.23 pmeter.Average_Active_Power
143.48 -43.2% 81.53 pmeter.performance_per_watt
25025 ± 50% -50.2% 12452 ± 98% proc-vmstat.numa_pte_updates
823677 -1.0% 815119 proc-vmstat.pgfault
14.68 +19.6 34.31 ± 2% mpstat.cpu.soft%
37.84 -21.0 16.88 ± 5% mpstat.cpu.sys%
4.73 ± 3% -2.4 2.30 ± 6% mpstat.cpu.usr%
231917 ± 7% +48.8% 345161 ± 22% numa-numastat.node0.numa_hit
458545 ± 3% -25.1% 343375 ± 24% numa-numastat.node1.local_node
467255 ± 4% -25.0% 350578 ± 22% numa-numastat.node1.numa_hit
53.00 -9.4% 48.00 vmstat.procs.r
12208228 ± 3% -48.6% 6279977 vmstat.system.cs
197402 -5.8% 185888 vmstat.system.in
9.422e+08 ± 2% -49.3% 4.781e+08 softirqs.NET_RX
2090351 ± 22% -23.7% 1595486 ± 3% softirqs.RCU
6824307 -13.3% 5915651 softirqs.SCHED
8055584 +10.9% 8929857 softirqs.TIMER
7.34e+09 +40.3% 1.03e+10 cpuidle.C1.time
1.833e+09 ± 3% -47.4% 9.638e+08 cpuidle.C1.usage
3.942e+08 ± 63% -68.5% 1.243e+08 ± 29% cpuidle.C6.time
558983 ± 49% -64.4% 199205 ± 18% cpuidle.C6.usage
1.824e+08 ± 3% -47.9% 95056831 cpuidle.POLL.time
34485351 ± 5% -82.4% 6062003 cpuidle.POLL.usage
1997 -14.5% 1708 turbostat.Avg_MHz
71.97 -10.8 61.14 turbostat.Busy%
1.833e+09 ± 3% -47.4% 9.638e+08 turbostat.C1
27.42 ± 2% +11.2 38.62 turbostat.C1%
556509 ± 50% -64.8% 196082 ± 18% turbostat.C6
1.46 ± 64% -1.0 0.45 ± 30% turbostat.C6%
27.21 ± 2% +40.7% 38.28 turbostat.CPU%c1
270.14 -13.5% 233.54 turbostat.PkgWatt
15.33 +2.1% 15.65 turbostat.RAMWatt
360260 ± 11% -57.2% 154078 ± 6% sched_debug.cfs_rq:/.MIN_vruntime.avg
2840994 -20.3% 2263123 sched_debug.cfs_rq:/.MIN_vruntime.max
922773 ± 6% -40.1% 552528 ± 3% sched_debug.cfs_rq:/.MIN_vruntime.stddev
360260 ± 11% -57.2% 154078 ± 6% sched_debug.cfs_rq:/.max_vruntime.avg
2840994 -20.3% 2263123 sched_debug.cfs_rq:/.max_vruntime.max
922773 ± 6% -40.1% 552528 ± 3% sched_debug.cfs_rq:/.max_vruntime.stddev
2804769 ± 2% -19.7% 2251378 sched_debug.cfs_rq:/.min_vruntime.avg
2905503 -20.3% 2314293 sched_debug.cfs_rq:/.min_vruntime.max
2731218 ± 3% -18.3% 2232433 sched_debug.cfs_rq:/.min_vruntime.min
29631 ± 28% -61.3% 11469 ± 21% sched_debug.cfs_rq:/.min_vruntime.stddev
0.58 ± 4% -9.9% 0.52 ± 3% sched_debug.cfs_rq:/.nr_running.avg
10.43 ± 9% -15.4% 8.83 ± 2% sched_debug.cfs_rq:/.runnable_load_avg.avg
29606 ± 28% -61.3% 11465 ± 21% sched_debug.cfs_rq:/.spread0.stddev
387.56 -21.9% 302.54 ± 4% sched_debug.cfs_rq:/.util_est_enqueued.avg
356998 ± 25% -45.1% 196064 ± 3% sched_debug.cpu.avg_idle.max
58785 ± 15% -30.2% 41011 ± 2% sched_debug.cpu.avg_idle.stddev
0.46 ± 15% -81.8% 0.08 ± 99% sched_debug.cpu.cpu_load[1].min
2.25 ± 8% -42.6% 1.29 ± 10% sched_debug.cpu.cpu_load[2].min
4.17 ± 14% -21.0% 3.29 ± 4% sched_debug.cpu.cpu_load[3].min
21059143 ± 3% -49.0% 10741980 sched_debug.cpu.nr_switches.avg
21578752 ± 2% -49.7% 10846007 sched_debug.cpu.nr_switches.max
20097461 ± 6% -47.8% 10493962 sched_debug.cpu.nr_switches.min
262381 ± 31% -79.3% 54430 ± 13% sched_debug.cpu.nr_switches.stddev
5.784e+12 ± 2% -27.2% 4.212e+12 perf-stat.branch-instructions
2.76 -0.8 1.96 perf-stat.branch-miss-rate%
1.599e+11 ± 2% -48.2% 8.276e+10 perf-stat.branch-misses
0.73 ± 8% +0.4 1.14 perf-stat.cache-miss-rate%
2.527e+09 ± 7% +11.0% 2.805e+09 perf-stat.cache-misses
3.482e+11 -29.1% 2.47e+11 perf-stat.cache-references
3.733e+09 ± 3% -48.8% 1.912e+09 perf-stat.context-switches
1.93 +20.0% 2.32 perf-stat.cpi
5.633e+13 -17.5% 4.646e+13 perf-stat.cpu-cycles
7.772e+09 ± 4% -43.6% 4.386e+09 ± 15% perf-stat.dTLB-load-misses
8.521e+12 ± 2% -33.9% 5.63e+12 perf-stat.dTLB-loads
0.00 ± 8% +0.0 0.00 ± 7% perf-stat.dTLB-store-miss-rate%
1.095e+08 ± 9% -13.5% 94683885 ± 7% perf-stat.dTLB-store-misses
5.49e+12 ± 2% -48.1% 2.851e+12 perf-stat.dTLB-stores
10.06 ± 5% +3.6 13.65 ± 14% perf-stat.iTLB-load-miss-rate%
9.169e+09 ± 5% -36.1% 5.859e+09 ± 16% perf-stat.iTLB-load-misses
8.203e+10 ± 3% -54.9% 3.696e+10 perf-stat.iTLB-loads
2.915e+13 ± 2% -31.3% 2.002e+13 perf-stat.instructions
0.52 -16.7% 0.43 perf-stat.ipc
801554 -1.1% 792718 perf-stat.minor-faults
7.603e+08 ± 5% +108.2% 1.583e+09 perf-stat.node-load-misses
93.23 ± 3% -35.4 57.88 ± 2% perf-stat.node-store-miss-rate%
1.072e+09 ± 5% -40.6% 6.373e+08 ± 2% perf-stat.node-store-misses
80515351 ± 49% +476.1% 4.638e+08 ± 3% perf-stat.node-stores
801557 -1.1% 792720 perf-stat.page-faults
30931 +35.4% 41869 perf-stat.path-length
15.38 -7.4 7.94 perf-profile.calltrace.cycles-pp.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
15.18 -7.3 7.85 perf-profile.calltrace.cycles-pp.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
13.40 -6.5 6.93 perf-profile.calltrace.cycles-pp.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
13.14 -6.3 6.82 perf-profile.calltrace.cycles-pp.udp_recvmsg.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64
8.62 ± 6% -5.9 2.74 perf-profile.calltrace.cycles-pp.ip_make_skb.udp_sendmsg.sock_sendmsg.__sys_sendto.__x64_sys_sendto
10.50 -5.1 5.43 perf-profile.calltrace.cycles-pp.__skb_recv_udp.udp_recvmsg.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom
8.98 ± 2% -4.4 4.62 perf-profile.calltrace.cycles-pp.__skb_wait_for_more_packets.__skb_recv_udp.udp_recvmsg.inet_recvmsg.__sys_recvfrom
5.03 ± 12% -4.2 0.88 ± 3% perf-profile.calltrace.cycles-pp.__ip_make_skb.ip_make_skb.udp_sendmsg.sock_sendmsg.__sys_sendto
9.64 -3.9 5.70 perf-profile.calltrace.cycles-pp.sock_def_readable.__udp_enqueue_schedule_skb.udp_queue_rcv_skb.__udp4_lib_rcv.ip_local_deliver_finish
9.42 -3.9 5.49 perf-profile.calltrace.cycles-pp.__wake_up_common_lock.sock_def_readable.__udp_enqueue_schedule_skb.udp_queue_rcv_skb.__udp4_lib_rcv
4.54 ± 13% -3.9 0.65 ± 4% perf-profile.calltrace.cycles-pp.__ip_select_ident.__ip_make_skb.ip_make_skb.udp_sendmsg.sock_sendmsg
44.22 -3.9 40.32 perf-profile.calltrace.cycles-pp.secondary_startup_64
4.46 ± 13% -3.9 0.60 ± 5% perf-profile.calltrace.cycles-pp.ip_idents_reserve.__ip_select_ident.__ip_make_skb.ip_make_skb.udp_sendmsg
43.77 -3.8 39.95 perf-profile.calltrace.cycles-pp.start_secondary.secondary_startup_64
43.76 -3.8 39.94 perf-profile.calltrace.cycles-pp.cpu_startup_entry.start_secondary.secondary_startup_64
7.84 ± 2% -3.8 4.06 perf-profile.calltrace.cycles-pp.schedule_timeout.__skb_wait_for_more_packets.__skb_recv_udp.udp_recvmsg.inet_recvmsg
43.66 -3.8 39.89 perf-profile.calltrace.cycles-pp.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64
8.92 -3.7 5.20 perf-profile.calltrace.cycles-pp.__wake_up_common.__wake_up_common_lock.sock_def_readable.__udp_enqueue_schedule_skb.udp_queue_rcv_skb
7.58 ± 2% -3.7 3.91 perf-profile.calltrace.cycles-pp.schedule.schedule_timeout.__skb_wait_for_more_packets.__skb_recv_udp.udp_recvmsg
7.46 ± 2% -3.6 3.82 perf-profile.calltrace.cycles-pp.__sched_text_start.schedule.schedule_timeout.__skb_wait_for_more_packets.__skb_recv_udp
8.60 -3.6 5.00 perf-profile.calltrace.cycles-pp.autoremove_wake_function.__wake_up_common.__wake_up_common_lock.sock_def_readable.__udp_enqueue_schedule_skb
8.49 -3.6 4.94 perf-profile.calltrace.cycles-pp.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock.sock_def_readable
6.10 -2.8 3.27 perf-profile.calltrace.cycles-pp.schedule_idle.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64
5.95 -2.7 3.20 perf-profile.calltrace.cycles-pp.__sched_text_start.schedule_idle.do_idle.cpu_startup_entry.start_secondary
4.93 -2.1 2.88 perf-profile.calltrace.cycles-pp.ttwu_do_activate.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
4.64 -1.9 2.69 perf-profile.calltrace.cycles-pp.enqueue_task_fair.ttwu_do_activate.try_to_wake_up.autoremove_wake_function.__wake_up_common
3.65 -1.8 1.84 perf-profile.calltrace.cycles-pp.menu_select.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64
4.00 -1.7 2.31 perf-profile.calltrace.cycles-pp.enqueue_entity.enqueue_task_fair.ttwu_do_activate.try_to_wake_up.autoremove_wake_function
3.24 ± 2% -1.6 1.63 perf-profile.calltrace.cycles-pp.dequeue_task_fair.__sched_text_start.schedule.schedule_timeout.__skb_wait_for_more_packets
3.27 -1.6 1.69 perf-profile.calltrace.cycles-pp.__ip_append_data.ip_make_skb.udp_sendmsg.sock_sendmsg.__sys_sendto
2.52 -1.3 1.19 perf-profile.calltrace.cycles-pp.__entry_SYSCALL_64_trampoline
2.48 ± 2% -1.3 1.20 perf-profile.calltrace.cycles-pp.tick_nohz_get_sleep_length.menu_select.do_idle.cpu_startup_entry.start_secondary
2.23 ± 4% -1.1 1.12 ± 8% perf-profile.calltrace.cycles-pp.__dev_queue_xmit.ip_finish_output2.ip_output.ip_send_skb.udp_send_skb
2.24 -1.1 1.18 perf-profile.calltrace.cycles-pp.pick_next_task_fair.__sched_text_start.schedule_idle.do_idle.cpu_startup_entry
2.16 ± 2% -1.0 1.17 perf-profile.calltrace.cycles-pp.sock_alloc_send_pskb.__ip_append_data.ip_make_skb.udp_sendmsg.sock_sendmsg
1.94 ± 2% -1.0 0.96 perf-profile.calltrace.cycles-pp.tick_nohz_next_event.tick_nohz_get_sleep_length.menu_select.do_idle.cpu_startup_entry
1.66 ± 5% -0.8 0.84 ± 10% perf-profile.calltrace.cycles-pp.dev_hard_start_xmit.__dev_queue_xmit.ip_finish_output2.ip_output.ip_send_skb
1.57 ± 5% -0.8 0.78 ± 10% perf-profile.calltrace.cycles-pp.loopback_xmit.dev_hard_start_xmit.__dev_queue_xmit.ip_finish_output2.ip_output
1.55 ± 2% -0.8 0.78 perf-profile.calltrace.cycles-pp.get_next_timer_interrupt.tick_nohz_next_event.tick_nohz_get_sleep_length.menu_select.do_idle
1.65 ± 2% -0.8 0.90 perf-profile.calltrace.cycles-pp.alloc_skb_with_frags.sock_alloc_send_pskb.__ip_append_data.ip_make_skb.udp_sendmsg
1.52 -0.7 0.77 perf-profile.calltrace.cycles-pp.syscall_return_via_sysret
1.57 ± 2% -0.7 0.85 perf-profile.calltrace.cycles-pp.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb.__ip_append_data.ip_make_skb
1.46 -0.7 0.79 perf-profile.calltrace.cycles-pp.set_next_entity.pick_next_task_fair.__sched_text_start.schedule_idle.do_idle
1.29 ± 5% -0.6 0.65 ± 7% perf-profile.calltrace.cycles-pp.ip_route_output_flow.udp_sendmsg.sock_sendmsg.__sys_sendto.__x64_sys_sendto
0.89 ± 2% -0.6 0.25 ±100% perf-profile.calltrace.cycles-pp.resched_curr.check_preempt_curr.ttwu_do_wakeup.try_to_wake_up.autoremove_wake_function
1.26 ± 5% -0.6 0.62 ± 7% perf-profile.calltrace.cycles-pp.ip_route_output_key_hash.ip_route_output_flow.udp_sendmsg.sock_sendmsg.__sys_sendto
1.46 -0.6 0.86 perf-profile.calltrace.cycles-pp.update_load_avg.enqueue_entity.enqueue_task_fair.ttwu_do_activate.try_to_wake_up
1.20 ± 5% -0.6 0.60 ± 7% perf-profile.calltrace.cycles-pp.ip_route_output_key_hash_rcu.ip_route_output_key_hash.ip_route_output_flow.udp_sendmsg.sock_sendmsg
1.34 -0.6 0.77 perf-profile.calltrace.cycles-pp.switch_mm_irqs_off.__sched_text_start.schedule_idle.do_idle.cpu_startup_entry
1.20 -0.6 0.63 ± 2% perf-profile.calltrace.cycles-pp.switch_mm.__sched_text_start.schedule.schedule_timeout.__skb_wait_for_more_packets
1.00 ± 2% -0.5 0.52 perf-profile.calltrace.cycles-pp.move_addr_to_user.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
1.06 ± 2% -0.5 0.59 perf-profile.calltrace.cycles-pp.ttwu_do_wakeup.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
1.10 ± 3% -0.5 0.65 perf-profile.calltrace.cycles-pp.select_task_rq_fair.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
0.95 ± 3% -0.4 0.54 ± 2% perf-profile.calltrace.cycles-pp.check_preempt_curr.ttwu_do_wakeup.try_to_wake_up.autoremove_wake_function.__wake_up_common
30.84 +2.3 33.16 perf-profile.calltrace.cycles-pp.cpuidle_enter_state.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64
29.71 +2.7 32.39 perf-profile.calltrace.cycles-pp.intel_idle.cpuidle_enter_state.do_idle.cpu_startup_entry.start_secondary
49.06 +7.3 56.32 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe
48.63 +7.5 56.11 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe
32.67 +15.2 47.87 perf-profile.calltrace.cycles-pp.__x64_sys_sendto.do_syscall_64.entry_SYSCALL_64_after_hwframe
32.48 +15.3 47.77 perf-profile.calltrace.cycles-pp.__sys_sendto.__x64_sys_sendto.do_syscall_64.entry_SYSCALL_64_after_hwframe
31.55 +15.7 47.30 perf-profile.calltrace.cycles-pp.sock_sendmsg.__sys_sendto.__x64_sys_sendto.do_syscall_64.entry_SYSCALL_64_after_hwframe
30.98 +16.0 47.02 perf-profile.calltrace.cycles-pp.udp_sendmsg.sock_sendmsg.__sys_sendto.__x64_sys_sendto.do_syscall_64
20.10 +23.1 43.17 perf-profile.calltrace.cycles-pp.udp_send_skb.udp_sendmsg.sock_sendmsg.__sys_sendto.__x64_sys_sendto
19.82 +23.2 43.02 perf-profile.calltrace.cycles-pp.ip_send_skb.udp_send_skb.udp_sendmsg.sock_sendmsg.__sys_sendto
19.32 +23.4 42.76 perf-profile.calltrace.cycles-pp.ip_output.ip_send_skb.udp_send_skb.udp_sendmsg.sock_sendmsg
17.83 +24.2 42.03 perf-profile.calltrace.cycles-pp.ip_finish_output2.ip_output.ip_send_skb.udp_send_skb.udp_sendmsg
15.21 +25.5 40.72 perf-profile.calltrace.cycles-pp.__local_bh_enable_ip.ip_finish_output2.ip_output.ip_send_skb.udp_send_skb
15.07 +25.6 40.64 perf-profile.calltrace.cycles-pp.do_softirq.__local_bh_enable_ip.ip_finish_output2.ip_output.ip_send_skb
14.83 +25.7 40.52 perf-profile.calltrace.cycles-pp.do_softirq_own_stack.do_softirq.__local_bh_enable_ip.ip_finish_output2.ip_output
14.73 +25.7 40.45 perf-profile.calltrace.cycles-pp.__softirqentry_text_start.do_softirq_own_stack.do_softirq.__local_bh_enable_ip.ip_finish_output2
14.31 +25.9 40.17 perf-profile.calltrace.cycles-pp.net_rx_action.__softirqentry_text_start.do_softirq_own_stack.do_softirq.__local_bh_enable_ip
13.95 +26.0 39.97 perf-profile.calltrace.cycles-pp.process_backlog.net_rx_action.__softirqentry_text_start.do_softirq_own_stack.do_softirq
13.17 +26.4 39.61 perf-profile.calltrace.cycles-pp.__netif_receive_skb_one_core.process_backlog.net_rx_action.__softirqentry_text_start.do_softirq_own_stack
12.85 +26.6 39.43 perf-profile.calltrace.cycles-pp.ip_rcv.__netif_receive_skb_one_core.process_backlog.net_rx_action.__softirqentry_text_start
12.43 +26.7 39.16 perf-profile.calltrace.cycles-pp.ip_local_deliver.ip_rcv.__netif_receive_skb_one_core.process_backlog.net_rx_action
12.27 +26.8 39.07 perf-profile.calltrace.cycles-pp.ip_local_deliver_finish.ip_local_deliver.ip_rcv.__netif_receive_skb_one_core.process_backlog
11.92 +27.0 38.90 perf-profile.calltrace.cycles-pp.__udp4_lib_rcv.ip_local_deliver_finish.ip_local_deliver.ip_rcv.__netif_receive_skb_one_core
11.37 +27.2 38.58 perf-profile.calltrace.cycles-pp.udp_queue_rcv_skb.__udp4_lib_rcv.ip_local_deliver_finish.ip_local_deliver.ip_rcv
10.29 +27.7 38.01 perf-profile.calltrace.cycles-pp.__udp_enqueue_schedule_skb.udp_queue_rcv_skb.__udp4_lib_rcv.ip_local_deliver_finish.ip_local_deliver
0.00 +30.3 30.29 perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock_irqsave.__percpu_counter_sum.__sk_mem_raise_allocated.__udp_enqueue_schedule_skb
0.00 +30.5 30.48 perf-profile.calltrace.cycles-pp._raw_spin_lock_irqsave.__percpu_counter_sum.__sk_mem_raise_allocated.__udp_enqueue_schedule_skb.udp_queue_rcv_skb
0.00 +31.8 31.84 perf-profile.calltrace.cycles-pp.__percpu_counter_sum.__sk_mem_raise_allocated.__udp_enqueue_schedule_skb.udp_queue_rcv_skb.__udp4_lib_rcv
0.00 +32.0 31.98 perf-profile.calltrace.cycles-pp.__sk_mem_raise_allocated.__udp_enqueue_schedule_skb.udp_queue_rcv_skb.__udp4_lib_rcv.ip_local_deliver_finish
15.40 -7.4 7.96 perf-profile.children.cycles-pp.__x64_sys_recvfrom
15.20 -7.3 7.85 perf-profile.children.cycles-pp.__sys_recvfrom
13.61 -6.5 7.12 perf-profile.children.cycles-pp.__sched_text_start
13.40 -6.5 6.94 perf-profile.children.cycles-pp.inet_recvmsg
13.15 -6.3 6.83 perf-profile.children.cycles-pp.udp_recvmsg
8.64 ± 6% -5.9 2.75 perf-profile.children.cycles-pp.ip_make_skb
10.53 -5.1 5.44 perf-profile.children.cycles-pp.__skb_recv_udp
8.98 ± 2% -4.4 4.63 perf-profile.children.cycles-pp.__skb_wait_for_more_packets
5.04 ± 12% -4.2 0.88 ± 3% perf-profile.children.cycles-pp.__ip_make_skb
9.65 -3.9 5.71 perf-profile.children.cycles-pp.sock_def_readable
9.43 -3.9 5.49 perf-profile.children.cycles-pp.__wake_up_common_lock
4.54 ± 13% -3.9 0.65 ± 4% perf-profile.children.cycles-pp.__ip_select_ident
44.22 -3.9 40.32 perf-profile.children.cycles-pp.secondary_startup_64
44.22 -3.9 40.32 perf-profile.children.cycles-pp.cpu_startup_entry
44.20 -3.9 40.31 perf-profile.children.cycles-pp.do_idle
4.46 ± 13% -3.9 0.60 ± 5% perf-profile.children.cycles-pp.ip_idents_reserve
43.77 -3.8 39.95 perf-profile.children.cycles-pp.start_secondary
8.93 -3.7 5.20 perf-profile.children.cycles-pp.__wake_up_common
7.59 ± 2% -3.7 3.92 perf-profile.children.cycles-pp.schedule
8.61 -3.6 5.01 perf-profile.children.cycles-pp.autoremove_wake_function
8.51 -3.6 4.95 perf-profile.children.cycles-pp.try_to_wake_up
6.16 -2.9 3.31 perf-profile.children.cycles-pp.schedule_idle
4.93 -2.1 2.88 perf-profile.children.cycles-pp.ttwu_do_activate
4.64 -1.9 2.69 perf-profile.children.cycles-pp.enqueue_task_fair
3.70 -1.8 1.87 perf-profile.children.cycles-pp.menu_select
4.09 -1.7 2.36 perf-profile.children.cycles-pp.enqueue_entity
3.32 ± 2% -1.7 1.67 perf-profile.children.cycles-pp.dequeue_task_fair
3.28 -1.6 1.69 perf-profile.children.cycles-pp.__ip_append_data
2.98 -1.5 1.44 ± 2% perf-profile.children.cycles-pp.__entry_SYSCALL_64_trampoline
3.02 ± 2% -1.5 1.51 perf-profile.children.cycles-pp.dequeue_entity
2.93 -1.3 1.61 perf-profile.children.cycles-pp.update_load_avg
2.78 -1.3 1.47 perf-profile.children.cycles-pp.pick_next_task_fair
2.51 -1.3 1.22 perf-profile.children.cycles-pp.tick_nohz_get_sleep_length
2.52 -1.1 1.38 perf-profile.children.cycles-pp.switch_mm_irqs_off
2.24 ± 4% -1.1 1.13 ± 8% perf-profile.children.cycles-pp.__dev_queue_xmit
1.97 ± 2% -1.0 0.97 perf-profile.children.cycles-pp.tick_nohz_next_event
2.16 -1.0 1.18 perf-profile.children.cycles-pp.sock_alloc_send_pskb
1.74 -0.8 0.89 perf-profile.children.cycles-pp.syscall_return_via_sysret
1.67 ± 4% -0.8 0.84 ± 10% perf-profile.children.cycles-pp.dev_hard_start_xmit
1.58 ± 5% -0.8 0.79 ± 11% perf-profile.children.cycles-pp.loopback_xmit
1.57 ± 2% -0.8 0.79 perf-profile.children.cycles-pp.get_next_timer_interrupt
1.46 -0.8 0.70 perf-profile.children.cycles-pp._raw_spin_lock
1.65 ± 2% -0.8 0.90 perf-profile.children.cycles-pp.alloc_skb_with_frags
1.58 ± 2% -0.7 0.85 perf-profile.children.cycles-pp.__alloc_skb
1.49 -0.7 0.80 perf-profile.children.cycles-pp.set_next_entity
1.36 ± 2% -0.7 0.67 perf-profile.children.cycles-pp.load_new_mm_cr3
1.29 ± 5% -0.6 0.65 ± 7% perf-profile.children.cycles-pp.ip_route_output_flow
1.26 ± 5% -0.6 0.63 ± 7% perf-profile.children.cycles-pp.ip_route_output_key_hash
1.21 ± 5% -0.6 0.60 ± 7% perf-profile.children.cycles-pp.ip_route_output_key_hash_rcu
1.24 ± 2% -0.6 0.65 perf-profile.children.cycles-pp.switch_mm
1.17 -0.6 0.59 perf-profile.children.cycles-pp.update_curr
1.10 ± 3% -0.6 0.54 ± 3% perf-profile.children.cycles-pp.nf_hook_slow
1.08 ± 3% -0.6 0.52 perf-profile.children.cycles-pp.__next_timer_interrupt
1.09 -0.5 0.60 ± 2% perf-profile.children.cycles-pp.__update_load_avg_cfs_rq
0.95 ± 2% -0.5 0.45 ± 3% perf-profile.children.cycles-pp.___perf_sw_event
1.01 ± 2% -0.5 0.52 perf-profile.children.cycles-pp.move_addr_to_user
1.06 ± 2% -0.5 0.59 perf-profile.children.cycles-pp.ttwu_do_wakeup
1.10 ± 3% -0.5 0.65 perf-profile.children.cycles-pp.select_task_rq_fair
1.02 ± 2% -0.4 0.57 perf-profile.children.cycles-pp.check_preempt_curr
0.83 ± 2% -0.4 0.41 ± 2% perf-profile.children.cycles-pp.__switch_to_asm
0.87 ± 2% -0.4 0.46 ± 3% perf-profile.children.cycles-pp._copy_to_iter
0.77 ± 5% -0.4 0.37 ± 3% perf-profile.children.cycles-pp.selinux_ip_postroute
0.84 -0.4 0.45 ± 3% perf-profile.children.cycles-pp.sk_filter_trim_cap
0.89 ± 2% -0.4 0.50 ± 2% perf-profile.children.cycles-pp.resched_curr
0.87 ± 2% -0.4 0.48 perf-profile.children.cycles-pp.__consume_stateless_skb
0.75 ± 2% -0.4 0.36 ± 2% perf-profile.children.cycles-pp.__switch_to
0.79 -0.4 0.42 ± 2% perf-profile.children.cycles-pp.security_sock_rcv_skb
0.72 ± 7% -0.4 0.36 ± 3% perf-profile.children.cycles-pp.update_cfs_group
0.64 ± 2% -0.4 0.29 perf-profile.children.cycles-pp.ip_generic_getfrag
0.73 ± 3% -0.4 0.38 ± 2% perf-profile.children.cycles-pp.tick_nohz_idle_exit
0.72 -0.3 0.37 ± 3% perf-profile.children.cycles-pp.finish_task_switch
0.73 -0.3 0.39 ± 4% perf-profile.children.cycles-pp.sock_wfree
0.74 -0.3 0.40 ± 3% perf-profile.children.cycles-pp.selinux_socket_sock_rcv_skb
0.69 ± 10% -0.3 0.35 ± 12% perf-profile.children.cycles-pp.fib_table_lookup
0.72 ± 2% -0.3 0.39 ± 3% perf-profile.children.cycles-pp.copy_user_generic_unrolled
0.62 ± 5% -0.3 0.30 ± 4% perf-profile.children.cycles-pp.selinux_ip_postroute_compat
0.73 -0.3 0.42 perf-profile.children.cycles-pp._raw_spin_lock_bh
0.67 ± 2% -0.3 0.35 ± 3% perf-profile.children.cycles-pp.__slab_free
0.53 ± 2% -0.3 0.23 ± 3% perf-profile.children.cycles-pp._copy_from_iter_full
0.61 ± 2% -0.3 0.33 ± 2% perf-profile.children.cycles-pp.sched_clock_cpu
0.69 ± 3% -0.3 0.40 perf-profile.children.cycles-pp.poll_idle
0.61 ± 2% -0.3 0.34 ± 5% perf-profile.children.cycles-pp.account_entity_enqueue
0.56 ± 2% -0.3 0.30 ± 3% perf-profile.children.cycles-pp.sched_clock
0.47 ± 21% -0.3 0.20 ± 30% perf-profile.children.cycles-pp.netif_rx_internal
0.51 ± 3% -0.3 0.25 ± 4% perf-profile.children.cycles-pp.sockfd_lookup_light
0.54 ± 2% -0.3 0.28 ± 2% perf-profile.children.cycles-pp.native_sched_clock
0.46 ± 4% -0.2 0.21 ± 3% perf-profile.children.cycles-pp.pick_next_task_idle
0.55 -0.2 0.30 ± 4% perf-profile.children.cycles-pp.ktime_get
0.48 ± 3% -0.2 0.24 perf-profile.children.cycles-pp.__might_fault
0.43 ± 4% -0.2 0.18 ± 2% perf-profile.children.cycles-pp.hrtimer_next_event_without
0.54 -0.2 0.30 ± 2% perf-profile.children.cycles-pp.reweight_entity
0.53 -0.2 0.29 perf-profile.children.cycles-pp.prepare_to_wait_exclusive
0.56 ± 2% -0.2 0.33 perf-profile.children.cycles-pp.__update_load_avg_se
0.72 ± 4% -0.2 0.51 perf-profile.children.cycles-pp.find_next_bit
0.46 ± 3% -0.2 0.24 perf-profile.children.cycles-pp.tick_nohz_idle_enter
0.41 ± 10% -0.2 0.19 ± 7% perf-profile.children.cycles-pp.avc_has_perm
0.41 ± 2% -0.2 0.20 ± 4% perf-profile.children.cycles-pp.__fget_light
0.43 ± 10% -0.2 0.21 ± 11% perf-profile.children.cycles-pp.read_tsc
0.51 ± 3% -0.2 0.30 ± 2% perf-profile.children.cycles-pp.update_rq_clock
0.48 -0.2 0.27 ± 3% perf-profile.children.cycles-pp.selinux_sock_rcv_skb_compat
0.45 ± 2% -0.2 0.25 perf-profile.children.cycles-pp.native_write_msr
0.46 ± 2% -0.2 0.26 ± 4% perf-profile.children.cycles-pp.sock_def_write_space
0.38 -0.2 0.18 ± 2% perf-profile.children.cycles-pp.kmem_cache_alloc_node
0.49 ± 2% -0.2 0.30 ± 2% perf-profile.children.cycles-pp.__kmalloc_reserve
0.42 ± 2% -0.2 0.23 ± 2% perf-profile.children.cycles-pp.update_ts_time_stats
0.46 ± 4% -0.2 0.27 ± 3% perf-profile.children.cycles-pp.copyout
0.40 ± 2% -0.2 0.21 ± 4% perf-profile.children.cycles-pp.ip_local_out
0.42 -0.2 0.23 perf-profile.children.cycles-pp.skb_set_owner_w
0.36 -0.2 0.18 ± 4% perf-profile.children.cycles-pp.pick_next_entity
0.36 ± 3% -0.2 0.18 ± 4% perf-profile.children.cycles-pp.__ip_local_out
0.33 ± 2% -0.2 0.16 ± 5% perf-profile.children.cycles-pp.validate_xmit_skb
0.32 ± 2% -0.2 0.15 ± 3% perf-profile.children.cycles-pp.enqueue_to_backlog
0.31 ± 3% -0.2 0.15 ± 3% perf-profile.children.cycles-pp.put_prev_task_fair
0.39 ± 7% -0.2 0.23 ± 4% perf-profile.children.cycles-pp.ksize
0.44 -0.2 0.28 perf-profile.children.cycles-pp.__kmalloc_node_track_caller
0.41 ± 7% -0.2 0.26 perf-profile.children.cycles-pp.select_idle_sibling
0.34 ± 2% -0.2 0.19 ± 6% perf-profile.children.cycles-pp.ipv4_mtu
0.30 ± 2% -0.1 0.15 ± 3% perf-profile.children.cycles-pp.___might_sleep
0.35 ± 3% -0.1 0.21 ± 2% perf-profile.children.cycles-pp.nr_iowait_cpu
0.28 -0.1 0.14 ± 3% perf-profile.children.cycles-pp.__skb_try_recv_from_queue
0.27 ± 5% -0.1 0.12 ± 8% perf-profile.children.cycles-pp.security_socket_sendmsg
0.29 ± 2% -0.1 0.15 ± 5% perf-profile.children.cycles-pp.__udp4_lib_lookup
0.24 ± 3% -0.1 0.10 perf-profile.children.cycles-pp.copyin
0.32 ± 3% -0.1 0.18 ± 3% perf-profile.children.cycles-pp.hrtimer_get_next_event
0.25 ± 4% -0.1 0.11 ± 4% perf-profile.children.cycles-pp.kmem_cache_free
0.24 ± 2% -0.1 0.11 ± 4% perf-profile.children.cycles-pp.entry_SYSCALL_64_stage2
0.28 ± 2% -0.1 0.15 ± 2% perf-profile.children.cycles-pp.move_addr_to_kernel
0.27 ± 4% -0.1 0.14 ± 3% perf-profile.children.cycles-pp.sock_has_perm
0.28 -0.1 0.15 ± 5% perf-profile.children.cycles-pp.selinux_parse_skb
0.35 ± 3% -0.1 0.23 ± 3% perf-profile.children.cycles-pp.available_idle_cpu
0.25 -0.1 0.12 ± 4% perf-profile.children.cycles-pp._copy_from_user
0.27 ± 20% -0.1 0.15 ± 7% perf-profile.children.cycles-pp.__netif_receive_skb_core
0.29 ± 2% -0.1 0.17 ± 2% perf-profile.children.cycles-pp.__list_del_entry_valid
0.20 ± 8% -0.1 0.08 ± 5% perf-profile.children.cycles-pp.__hrtimer_next_event_base
0.22 ± 4% -0.1 0.11 ± 4% perf-profile.children.cycles-pp.account_entity_dequeue
0.32 -0.1 0.20 ± 5% perf-profile.children.cycles-pp.__enqueue_entity
0.29 ± 2% -0.1 0.17 ± 2% perf-profile.children.cycles-pp.skb_release_data
0.22 ± 3% -0.1 0.11 perf-profile.children.cycles-pp._copy_to_user
0.20 ± 4% -0.1 0.09 ± 5% perf-profile.children.cycles-pp.menu_reflect
0.21 ± 2% -0.1 0.10 ± 4% perf-profile.children.cycles-pp.put_prev_entity
0.23 -0.1 0.12 ± 3% perf-profile.children.cycles-pp.cpuidle_governor_latency_req
0.22 ± 3% -0.1 0.11 ± 3% perf-profile.children.cycles-pp.copy_user_enhanced_fast_string
0.22 ± 3% -0.1 0.11 ± 4% perf-profile.children.cycles-pp.rcu_idle_exit
0.21 ± 2% -0.1 0.10 ± 4% perf-profile.children.cycles-pp.security_sk_classify_flow
0.21 ± 3% -0.1 0.11 ± 4% perf-profile.children.cycles-pp.sock_recvmsg
0.18 ± 3% -0.1 0.08 ± 5% perf-profile.children.cycles-pp.__get_user_4
0.51 ± 3% -0.1 0.41 ± 2% perf-profile.children.cycles-pp._raw_spin_unlock_irqrestore
0.22 -0.1 0.12 ± 8% perf-profile.children.cycles-pp.ip_setup_cork
0.19 ± 4% -0.1 0.09 ± 4% perf-profile.children.cycles-pp.inet_sendmsg
0.21 ± 3% -0.1 0.11 ± 3% perf-profile.children.cycles-pp.__might_sleep
0.20 ± 2% -0.1 0.10 perf-profile.children.cycles-pp.__calc_delta
0.18 ± 2% -0.1 0.09 ± 4% perf-profile.children.cycles-pp.security_socket_recvmsg
0.18 ± 2% -0.1 0.08 ± 5% perf-profile.children.cycles-pp.call_cpuidle
0.23 ± 2% -0.1 0.14 ± 8% perf-profile.children.cycles-pp.__list_add_valid
0.12 ± 4% -0.1 0.04 ± 57% perf-profile.children.cycles-pp.sched_ttwu_pending
0.19 ± 3% -0.1 0.10 ± 4% perf-profile.children.cycles-pp.rcu_eqs_enter
0.23 ± 3% -0.1 0.15 ± 4% perf-profile.children.cycles-pp.activate_task
0.18 ± 2% -0.1 0.10 ± 7% perf-profile.children.cycles-pp.netif_skb_features
0.18 ± 4% -0.1 0.10 ± 4% perf-profile.children.cycles-pp.rb_erase_cached
0.15 ± 2% -0.1 0.07 ± 5% perf-profile.children.cycles-pp.rcu_needs_cpu
0.17 ± 4% -0.1 0.11 ± 4% perf-profile.children.cycles-pp.ip_finish_output
0.16 ± 2% -0.1 0.10 perf-profile.children.cycles-pp.ip_rcv_core
0.13 ± 3% -0.1 0.07 ± 5% perf-profile.children.cycles-pp.selinux_ipv4_output
0.13 -0.1 0.07 perf-profile.children.cycles-pp.selinux_sk_getsecid
0.11 ± 6% -0.1 0.05 perf-profile.children.cycles-pp.update_min_vruntime
0.08 ± 5% -0.1 0.03 ±100% perf-profile.children.cycles-pp.ip_send_check
0.12 ± 9% -0.1 0.06 ± 11% perf-profile.children.cycles-pp.compute_score
0.10 ± 5% -0.1 0.04 ± 57% perf-profile.children.cycles-pp.deactivate_task
0.11 ± 6% -0.1 0.05 ± 8% perf-profile.children.cycles-pp.ipv4_pktinfo_prepare
0.11 -0.1 0.06 ± 9% perf-profile.children.cycles-pp.dst_release
0.08 ± 8% -0.1 0.03 ±100% perf-profile.children.cycles-pp.__slab_alloc
0.11 ± 4% -0.1 0.06 ± 11% perf-profile.children.cycles-pp.pm_qos_request
0.10 ± 4% -0.1 0.05 perf-profile.children.cycles-pp.udp_rcv
0.10 ± 8% -0.1 0.05 perf-profile.children.cycles-pp.__update_idle_core
0.13 ± 10% -0.1 0.08 ± 6% perf-profile.children.cycles-pp.import_single_range
0.10 ± 4% -0.0 0.05 perf-profile.children.cycles-pp.rcu_eqs_exit
0.11 ± 7% -0.0 0.07 perf-profile.children.cycles-pp.rb_insert_color_cached
0.08 ± 8% -0.0 0.04 ± 58% perf-profile.children.cycles-pp.tick_check_broadcast_expired
0.13 -0.0 0.10 ± 4% perf-profile.children.cycles-pp.irq_exit
0.07 ± 5% -0.0 0.05 perf-profile.children.cycles-pp.rebalance_domains
0.07 ± 5% -0.0 0.05 ± 9% perf-profile.children.cycles-pp.receiver_wake_function
0.00 +0.1 0.10 ± 5% perf-profile.children.cycles-pp.percpu_counter_add_batch
0.00 +0.3 0.28 perf-profile.children.cycles-pp.cpumask_next
31.18 +2.3 33.50 perf-profile.children.cycles-pp.cpuidle_enter_state
30.02 +2.7 32.70 perf-profile.children.cycles-pp.intel_idle
49.12 +7.3 56.39 perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe
48.68 +7.5 56.17 perf-profile.children.cycles-pp.do_syscall_64
32.69 +15.2 47.88 perf-profile.children.cycles-pp.__x64_sys_sendto
32.49 +15.3 47.77 perf-profile.children.cycles-pp.__sys_sendto
31.56 +15.7 47.30 perf-profile.children.cycles-pp.sock_sendmsg
31.00 +16.0 47.03 perf-profile.children.cycles-pp.udp_sendmsg
20.10 +23.1 43.18 perf-profile.children.cycles-pp.udp_send_skb
19.82 +23.2 43.03 perf-profile.children.cycles-pp.ip_send_skb
19.33 +23.4 42.77 perf-profile.children.cycles-pp.ip_output
17.85 +24.2 42.05 perf-profile.children.cycles-pp.ip_finish_output2
15.28 +25.5 40.76 perf-profile.children.cycles-pp.__local_bh_enable_ip
15.10 +25.6 40.66 perf-profile.children.cycles-pp.do_softirq
14.84 +25.7 40.52 perf-profile.children.cycles-pp.do_softirq_own_stack
14.85 +25.7 40.54 perf-profile.children.cycles-pp.__softirqentry_text_start
14.32 +25.9 40.18 perf-profile.children.cycles-pp.net_rx_action
13.96 +26.0 39.98 perf-profile.children.cycles-pp.process_backlog
13.18 +26.4 39.61 perf-profile.children.cycles-pp.__netif_receive_skb_one_core
12.86 +26.6 39.44 perf-profile.children.cycles-pp.ip_rcv
12.43 +26.7 39.16 perf-profile.children.cycles-pp.ip_local_deliver
12.27 +26.8 39.08 perf-profile.children.cycles-pp.ip_local_deliver_finish
11.93 +27.0 38.90 perf-profile.children.cycles-pp.__udp4_lib_rcv
11.38 +27.2 38.59 perf-profile.children.cycles-pp.udp_queue_rcv_skb
10.30 +27.7 38.02 perf-profile.children.cycles-pp.__udp_enqueue_schedule_skb
1.43 ± 3% +29.9 31.29 perf-profile.children.cycles-pp._raw_spin_lock_irqsave
0.06 ± 87% +30.2 30.31 perf-profile.children.cycles-pp.native_queued_spin_lock_slowpath
0.15 ± 3% +31.8 31.98 perf-profile.children.cycles-pp.__sk_mem_raise_allocated
0.00 +31.9 31.87 perf-profile.children.cycles-pp.__percpu_counter_sum
4.44 ± 13% -3.8 0.60 ± 5% perf-profile.self.cycles-pp.ip_idents_reserve
2.90 -1.5 1.39 perf-profile.self.cycles-pp.__entry_SYSCALL_64_trampoline
2.17 -1.0 1.20 perf-profile.self.cycles-pp.__sched_text_start
1.74 -0.8 0.89 perf-profile.self.cycles-pp.syscall_return_via_sysret
1.39 -0.7 0.67 perf-profile.self.cycles-pp._raw_spin_lock
1.36 ± 2% -0.7 0.67 perf-profile.self.cycles-pp.load_new_mm_cr3
1.25 -0.6 0.67 perf-profile.self.cycles-pp.update_load_avg
1.08 ± 2% -0.5 0.59 ± 2% perf-profile.self.cycles-pp.__update_load_avg_cfs_rq
0.87 ± 2% -0.5 0.41 ± 3% perf-profile.self.cycles-pp.___perf_sw_event
1.16 -0.4 0.71 perf-profile.self.cycles-pp.switch_mm_irqs_off
0.83 ± 2% -0.4 0.41 ± 2% perf-profile.self.cycles-pp.__switch_to_asm
0.85 -0.4 0.43 ± 2% perf-profile.self.cycles-pp.update_curr
1.38 ± 2% -0.4 0.98 perf-profile.self.cycles-pp._raw_spin_lock_irqsave
0.89 ± 2% -0.4 0.49 perf-profile.self.cycles-pp.menu_select
0.71 ± 2% -0.4 0.35 ± 2% perf-profile.self.cycles-pp.__switch_to
0.79 -0.4 0.43 perf-profile.self.cycles-pp.do_idle
0.82 ± 3% -0.4 0.46 ± 2% perf-profile.self.cycles-pp.resched_curr
0.68 ± 2% -0.4 0.32 ± 3% perf-profile.self.cycles-pp.udp_sendmsg
0.70 ± 7% -0.3 0.35 ± 3% perf-profile.self.cycles-pp.update_cfs_group
0.68 ± 11% -0.3 0.35 ± 12% perf-profile.self.cycles-pp.fib_table_lookup
0.71 -0.3 0.41 perf-profile.self.cycles-pp._raw_spin_lock_bh
0.68 ± 3% -0.3 0.38 ± 2% perf-profile.self.cycles-pp.copy_user_generic_unrolled
0.66 ± 2% -0.3 0.35 ± 3% perf-profile.self.cycles-pp.__slab_free
0.62 -0.3 0.32 ± 2% perf-profile.self.cycles-pp.pick_next_task_fair
0.67 ± 2% -0.3 0.38 perf-profile.self.cycles-pp.select_task_rq_fair
0.65 -0.3 0.36 perf-profile.self.cycles-pp.set_next_entity
0.59 -0.3 0.32 perf-profile.self.cycles-pp.udp_recvmsg
0.57 -0.3 0.30 ± 2% perf-profile.self.cycles-pp.finish_task_switch
0.65 -0.3 0.39 perf-profile.self.cycles-pp.enqueue_entity
0.51 ± 2% -0.3 0.26 ± 3% perf-profile.self.cycles-pp.do_syscall_64
0.49 -0.3 0.24 ± 3% perf-profile.self.cycles-pp.ip_route_output_key_hash_rcu
0.52 ± 2% -0.2 0.27 ± 2% perf-profile.self.cycles-pp.native_sched_clock
0.54 ± 3% -0.2 0.29 ± 2% perf-profile.self.cycles-pp.dequeue_entity
0.54 -0.2 0.30 ± 2% perf-profile.self.cycles-pp.reweight_entity
0.45 ± 2% -0.2 0.21 ± 7% perf-profile.self.cycles-pp.__ip_append_data
0.47 ± 3% -0.2 0.24 perf-profile.self.cycles-pp.__next_timer_interrupt
0.57 ± 3% -0.2 0.34 ± 3% perf-profile.self.cycles-pp.poll_idle
0.42 ± 2% -0.2 0.19 ± 3% perf-profile.self.cycles-pp.__skb_wait_for_more_packets
0.54 ± 2% -0.2 0.32 perf-profile.self.cycles-pp.__update_load_avg_se
0.44 -0.2 0.21 ± 2% perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe
0.39 ± 2% -0.2 0.16 ± 6% perf-profile.self.cycles-pp.ip_output
0.55 -0.2 0.33 ± 2% perf-profile.self.cycles-pp.enqueue_task_fair
0.41 ± 3% -0.2 0.20 ± 4% perf-profile.self.cycles-pp.__fget_light
0.40 ± 10% -0.2 0.18 ± 6% perf-profile.self.cycles-pp.avc_has_perm
0.48 -0.2 0.28 perf-profile.self.cycles-pp.move_addr_to_user
0.45 -0.2 0.24 ± 2% perf-profile.self.cycles-pp.native_write_msr
0.40 ± 10% -0.2 0.20 ± 13% perf-profile.self.cycles-pp.read_tsc
0.36 ± 4% -0.2 0.16 ± 4% perf-profile.self.cycles-pp.__skb_recv_udp
0.45 ± 2% -0.2 0.25 ± 5% perf-profile.self.cycles-pp.sock_def_write_space
0.42 ± 2% -0.2 0.23 ± 2% perf-profile.self.cycles-pp.account_entity_enqueue
0.38 ± 5% -0.2 0.19 ± 2% perf-profile.self.cycles-pp.dequeue_task_fair
0.39 ± 8% -0.2 0.20 ± 3% perf-profile.self.cycles-pp.ip_finish_output2
0.41 -0.2 0.23 perf-profile.self.cycles-pp.skb_set_owner_w
0.60 ± 4% -0.2 0.42 perf-profile.self.cycles-pp.find_next_bit
0.34 -0.2 0.17 ± 3% perf-profile.self.cycles-pp.__ip_make_skb
0.36 ± 3% -0.2 0.19 ± 2% perf-profile.self.cycles-pp.process_backlog
0.45 ± 4% -0.2 0.28 perf-profile.self.cycles-pp._raw_spin_unlock_irqrestore
0.31 ± 2% -0.2 0.15 ± 2% perf-profile.self.cycles-pp.__alloc_skb
0.33 -0.2 0.17 ± 4% perf-profile.self.cycles-pp.pick_next_entity
0.38 ± 6% -0.2 0.22 ± 3% perf-profile.self.cycles-pp.ksize
0.38 ± 4% -0.2 0.22 ± 3% perf-profile.self.cycles-pp.__softirqentry_text_start
0.31 ± 2% -0.2 0.15 ± 2% perf-profile.self.cycles-pp.__sys_sendto
0.29 ± 3% -0.2 0.14 ± 8% perf-profile.self.cycles-pp._copy_to_iter
0.30 ± 3% -0.1 0.15 perf-profile.self.cycles-pp.___might_sleep
0.27 -0.1 0.12 ± 5% perf-profile.self.cycles-pp.sock_wfree
0.40 ± 2% -0.1 0.26 ± 3% perf-profile.self.cycles-pp.__udp_enqueue_schedule_skb
0.27 ± 3% -0.1 0.13 ± 3% perf-profile.self.cycles-pp.__sys_recvfrom
0.31 -0.1 0.17 ± 6% perf-profile.self.cycles-pp.ipv4_mtu
0.34 ± 3% -0.1 0.21 ± 2% perf-profile.self.cycles-pp.nr_iowait_cpu
0.27 ± 4% -0.1 0.13 ± 3% perf-profile.self.cycles-pp.sock_has_perm
0.29 -0.1 0.16 ± 5% perf-profile.self.cycles-pp.net_rx_action
0.24 ± 3% -0.1 0.10 ± 4% perf-profile.self.cycles-pp.entry_SYSCALL_64_stage2
0.28 ± 2% -0.1 0.15 ± 4% perf-profile.self.cycles-pp.loopback_xmit
0.24 ± 3% -0.1 0.11 ± 4% perf-profile.self.cycles-pp.inet_recvmsg
0.27 -0.1 0.14 ± 3% perf-profile.self.cycles-pp.do_softirq
0.27 -0.1 0.15 ± 5% perf-profile.self.cycles-pp.selinux_parse_skb
0.27 ± 20% -0.1 0.15 ± 5% perf-profile.self.cycles-pp.__netif_receive_skb_core
0.35 ± 4% -0.1 0.23 perf-profile.self.cycles-pp.available_idle_cpu
0.31 ± 2% -0.1 0.19 ± 3% perf-profile.self.cycles-pp.__enqueue_entity
0.24 -0.1 0.12 ± 3% perf-profile.self.cycles-pp.selinux_socket_sock_rcv_skb
0.29 ± 3% -0.1 0.17 ± 4% perf-profile.self.cycles-pp.__list_del_entry_valid
0.28 ± 3% -0.1 0.17 ± 3% perf-profile.self.cycles-pp.skb_release_data
0.27 ± 2% -0.1 0.16 ± 4% perf-profile.self.cycles-pp.__udp4_lib_rcv
0.21 ± 5% -0.1 0.10 ± 5% perf-profile.self.cycles-pp.account_entity_dequeue
0.21 ± 2% -0.1 0.10 ± 4% perf-profile.self.cycles-pp.kmem_cache_alloc_node
0.18 ± 4% -0.1 0.08 ± 5% perf-profile.self.cycles-pp.tick_nohz_next_event
0.23 -0.1 0.12 ± 4% perf-profile.self.cycles-pp.udp_send_skb
0.21 ± 3% -0.1 0.10 ± 4% perf-profile.self.cycles-pp.copy_user_enhanced_fast_string
0.26 -0.1 0.16 ± 4% perf-profile.self.cycles-pp.try_to_wake_up
0.20 ± 4% -0.1 0.09 ± 4% perf-profile.self.cycles-pp.put_prev_task_fair
0.24 ± 3% -0.1 0.14 ± 3% perf-profile.self.cycles-pp.__wake_up_common
0.20 ± 4% -0.1 0.10 ± 4% perf-profile.self.cycles-pp.__x64_sys_recvfrom
0.18 ± 3% -0.1 0.08 ± 5% perf-profile.self.cycles-pp.selinux_ip_postroute_compat
0.18 ± 2% -0.1 0.08 ± 5% perf-profile.self.cycles-pp.__get_user_4
0.20 ± 8% -0.1 0.11 ± 4% perf-profile.self.cycles-pp.__dev_queue_xmit
0.26 ± 5% -0.1 0.16 ± 2% perf-profile.self.cycles-pp.update_rq_clock
0.26 -0.1 0.16 ± 4% perf-profile.self.cycles-pp.__kmalloc_node_track_caller
0.20 ± 2% -0.1 0.10 ± 4% perf-profile.self.cycles-pp.__local_bh_enable_ip
0.19 ± 3% -0.1 0.10 ± 4% perf-profile.self.cycles-pp.enqueue_to_backlog
0.18 ± 2% -0.1 0.08 ± 5% perf-profile.self.cycles-pp.call_cpuidle
0.22 ± 3% -0.1 0.13 ± 9% perf-profile.self.cycles-pp.__list_add_valid
0.19 ± 3% -0.1 0.10 perf-profile.self.cycles-pp.__calc_delta
0.17 ± 2% -0.1 0.08 ± 5% perf-profile.self.cycles-pp.inet_sendmsg
0.20 ± 4% -0.1 0.11 ± 6% perf-profile.self.cycles-pp.__x64_sys_sendto
0.19 ± 3% -0.1 0.10 ± 4% perf-profile.self.cycles-pp.rcu_eqs_enter
0.12 ± 4% -0.1 0.04 ± 57% perf-profile.self.cycles-pp.sched_ttwu_pending
0.23 ± 3% -0.1 0.15 ± 4% perf-profile.self.cycles-pp.activate_task
0.19 ± 4% -0.1 0.11 ± 4% perf-profile.self.cycles-pp.check_preempt_curr
0.18 ± 2% -0.1 0.10 perf-profile.self.cycles-pp.__might_sleep
0.18 ± 2% -0.1 0.09 ± 4% perf-profile.self.cycles-pp.__udp4_lib_lookup
0.19 ± 6% -0.1 0.11 ± 18% perf-profile.self.cycles-pp.ktime_get
0.14 ± 3% -0.1 0.06 ± 15% perf-profile.self.cycles-pp.validate_xmit_skb
0.16 ± 2% -0.1 0.08 ± 5% perf-profile.self.cycles-pp.selinux_sock_rcv_skb_compat
0.15 ± 4% -0.1 0.07 ± 5% perf-profile.self.cycles-pp.selinux_ip_postroute
0.13 ± 3% -0.1 0.05 perf-profile.self.cycles-pp._copy_from_iter_full
0.15 ± 3% -0.1 0.07 ± 10% perf-profile.self.cycles-pp.rcu_needs_cpu
0.16 ± 2% -0.1 0.08 ± 5% perf-profile.self.cycles-pp.prepare_to_wait_exclusive
0.17 ± 5% -0.1 0.09 ± 4% perf-profile.self.cycles-pp.ip_local_deliver_finish
0.14 ± 5% -0.1 0.07 ± 10% perf-profile.self.cycles-pp.netif_skb_features
0.10 ± 4% -0.1 0.03 ±100% perf-profile.self.cycles-pp.put_prev_entity
0.14 ± 3% -0.1 0.07 ± 7% perf-profile.self.cycles-pp.__might_fault
0.14 ± 3% -0.1 0.07 ± 6% perf-profile.self.cycles-pp.menu_reflect
0.16 ± 5% -0.1 0.09 perf-profile.self.cycles-pp.rb_erase_cached
0.16 ± 2% -0.1 0.10 ± 5% perf-profile.self.cycles-pp.ip_rcv_core
0.15 ± 3% -0.1 0.08 ± 5% perf-profile.self.cycles-pp.ip_rcv
0.09 ± 4% -0.1 0.03 ±100% perf-profile.self.cycles-pp.udp_rcv
0.15 -0.1 0.08 ± 5% perf-profile.self.cycles-pp.ip_local_deliver
0.13 ± 3% -0.1 0.07 ± 6% perf-profile.self.cycles-pp.__hrtimer_next_event_base
0.09 -0.1 0.03 ±100% perf-profile.self.cycles-pp.selinux_ipv4_output
0.09 ± 7% -0.1 0.03 ±100% perf-profile.self.cycles-pp.deactivate_task
0.12 ± 3% -0.1 0.06 ± 6% perf-profile.self.cycles-pp.rcu_idle_exit
0.12 -0.1 0.06 perf-profile.self.cycles-pp.selinux_sk_getsecid
0.11 ± 4% -0.1 0.05 perf-profile.self.cycles-pp.ip_make_skb
0.10 ± 11% -0.1 0.04 ± 57% perf-profile.self.cycles-pp.__update_idle_core
0.08 -0.1 0.03 ±100% perf-profile.self.cycles-pp.ip_send_check
0.11 ± 4% -0.1 0.06 ± 11% perf-profile.self.cycles-pp.pm_qos_request
0.11 ± 7% -0.1 0.06 ± 11% perf-profile.self.cycles-pp.compute_score
0.11 ± 7% -0.1 0.05 ± 9% perf-profile.self.cycles-pp.sock_sendmsg
0.11 ± 7% -0.1 0.06 ± 9% perf-profile.self.cycles-pp.ip_generic_getfrag
0.11 ± 3% -0.1 0.06 perf-profile.self.cycles-pp.switch_mm
0.11 ± 4% -0.1 0.06 ± 9% perf-profile.self.cycles-pp.nf_hook_slow
0.08 ± 6% -0.1 0.03 ±100% perf-profile.self.cycles-pp.alloc_skb_with_frags
0.10 ± 7% -0.1 0.05 perf-profile.self.cycles-pp.__skb_try_recv_from_queue
0.10 -0.1 0.05 perf-profile.self.cycles-pp.dst_release
0.12 ± 10% -0.0 0.07 perf-profile.self.cycles-pp.import_single_range
0.11 ± 6% -0.0 0.06 perf-profile.self.cycles-pp.rb_insert_color_cached
0.10 ± 4% -0.0 0.05 ± 8% perf-profile.self.cycles-pp.sock_alloc_send_pskb
0.12 ± 5% -0.0 0.08 ± 6% perf-profile.self.cycles-pp.cpuidle_enter_state
0.09 ± 4% -0.0 0.05 perf-profile.self.cycles-pp.ip_finish_output
0.10 -0.0 0.06 perf-profile.self.cycles-pp.ip_send_skb
0.09 ± 7% -0.0 0.05 perf-profile.self.cycles-pp.schedule_idle
0.09 ± 4% -0.0 0.05 perf-profile.self.cycles-pp.get_next_timer_interrupt
0.09 ± 4% -0.0 0.06 ± 9% perf-profile.self.cycles-pp.udp_queue_rcv_skb
0.08 ± 6% -0.0 0.04 ± 58% perf-profile.self.cycles-pp.tick_check_broadcast_expired
0.09 ± 5% -0.0 0.05 perf-profile.self.cycles-pp.dev_hard_start_xmit
0.08 ± 5% -0.0 0.05 perf-profile.self.cycles-pp.ip_setup_cork
0.10 ± 7% -0.0 0.07 ± 6% perf-profile.self.cycles-pp.do_softirq_own_stack
0.07 -0.0 0.05 perf-profile.self.cycles-pp.receiver_wake_function
0.00 +0.1 0.10 ± 5% perf-profile.self.cycles-pp.percpu_counter_add_batch
0.00 +0.1 0.10 ± 4% perf-profile.self.cycles-pp.cpumask_next
0.00 +1.0 1.01 perf-profile.self.cycles-pp.__percpu_counter_sum
30.01 +2.7 32.69 perf-profile.self.cycles-pp.intel_idle
0.06 ± 87% +30.2 30.31 perf-profile.self.cycles-pp.native_queued_spin_lock_slowpath



***************************************************************************************************
lkp-bdw-ep2: 88 threads Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz with 128G memory
=========================================================================================
cluster/compiler/cpufreq_governor/ip/kconfig/nr_threads/rootfs/runtime/tbox_group/test/testcase:
cs-localhost/gcc-7/performance/ipv4/x86_64-rhel-7.2/25%/debian-x86_64-2018-04-03.cgz/300s/lkp-bdw-ep2/UDP_RR/netperf

commit:
6da410d97f (" mlx5e-fixes-2018-09-05")
b99259a614 ("net/sock: move memory_allocated over to percpu_counter variables")

6da410d97ffa486e b99259a61450bb6403bbbbf279
---------------- --------------------------
fail:runs %reproduction fail:runs
| | |
:4 25% 1:4 dmesg.WARNING:at#for_ip_interrupt_entry/0x
9:4 -43% 7:4 perf-profile.calltrace.cycles-pp.dequeue_entity.dequeue_task_fair.__sched_text_start.schedule.schedule_timeout
3:4 -15% 3:4 perf-profile.calltrace.cycles-pp.switch_mm_irqs_off.switch_mm.__sched_text_start.schedule.schedule_timeout
26:4 -109% 21:4 perf-profile.children.cycles-pp.schedule_timeout
1:4 -11% 1:4 perf-profile.self.cycles-pp.schedule_timeout
%stddev %change %stddev
\ | \
1930589 -13.5% 1670226 netperf.Throughput_total_tps
87754 -13.5% 75919 netperf.Throughput_tps
1426 -3.9% 1371 ± 2% netperf.time.percent_of_cpu_this_job_got
3911 -2.6% 3809 ± 2% netperf.time.system_time
385.12 -17.0% 319.68 ± 4% netperf.time.user_time
5.792e+08 -13.5% 5.011e+08 netperf.time.voluntary_context_switches
5.792e+08 -13.5% 5.011e+08 netperf.workload
5.792e+08 -12.5% 5.068e+08 softirqs.NET_RX
7590488 -13.1% 6595363 vmstat.system.cs
2095 ± 50% +740.9% 17617 ± 87% numa-meminfo.node0.Shmem
40809 ± 4% -14.3% 34971 ± 11% numa-meminfo.node1.SReclaimable
415.39 -1.8% 408.09 pmeter.Average_Active_Power
211.25 -11.9% 186.02 pmeter.performance_per_watt
18997 ± 6% +8.2% 20555 ± 3% sched_debug.cpu.load.avg
13008831 -13.3% 11273636 sched_debug.cpu.nr_switches.avg
1.133e+09 -12.8% 9.877e+08 cpuidle.C1.usage
8.879e+09 ± 18% -43.5% 5.013e+09 ± 54% cpuidle.C6.time
25831364 ± 3% -27.4% 18752863 cpuidle.POLL.usage
7.97 +2.5 10.43 mpstat.cpu.soft%
17.09 -2.4 14.66 mpstat.cpu.sys%
2.34 -0.3 2.00 ± 2% mpstat.cpu.usr%
1037 -3.8% 998.50 turbostat.Avg_MHz
1.133e+09 -12.8% 9.877e+08 turbostat.C1
33.21 ± 18% -14.4 18.80 ± 55% turbostat.C6%
0.96 ± 96% +201.3% 2.88 ± 30% turbostat.CPU%c3
216.23 -2.2% 211.46 turbostat.PkgWatt
523.00 ± 50% +742.2% 4404 ± 87% numa-vmstat.node0.nr_shmem
471766 ± 12% +27.2% 600222 ± 6% numa-vmstat.node0.numa_hit
454541 ± 12% +28.6% 584644 ± 7% numa-vmstat.node0.numa_local
10202 ± 4% -14.3% 8743 ± 11% numa-vmstat.node1.nr_slab_reclaimable
665328 ± 9% -20.2% 530868 ± 7% numa-vmstat.node1.numa_hit
532036 ± 11% -25.5% 396167 ± 9% numa-vmstat.node1.numa_local
3.597e+12 -6.9% 3.348e+12 perf-stat.branch-instructions
2.70 -0.2 2.54 perf-stat.branch-miss-rate%
9.714e+10 -12.4% 8.509e+10 perf-stat.branch-misses
3.173e+11 ± 2% -14.9% 2.701e+11 perf-stat.cache-references
2.317e+09 -13.3% 2.01e+09 perf-stat.context-switches
1.59 +3.4% 1.64 perf-stat.cpi
2.871e+13 -4.5% 2.742e+13 perf-stat.cpu-cycles
0.09 -0.0 0.09 perf-stat.dTLB-load-miss-rate%
4.816e+09 -10.7% 4.302e+09 perf-stat.dTLB-load-misses
5.29e+12 -8.4% 4.846e+12 perf-stat.dTLB-loads
0.00 ± 27% +0.0 0.00 ± 9% perf-stat.dTLB-store-miss-rate%
50665696 ± 27% +87.6% 95071493 ± 9% perf-stat.dTLB-store-misses
3.418e+12 -11.6% 3.023e+12 perf-stat.dTLB-stores
11.55 ± 5% +1.9 13.47 ± 7% perf-stat.iTLB-load-miss-rate%
4.068e+10 -16.8% 3.384e+10 ± 2% perf-stat.iTLB-loads
1.81e+13 -7.6% 1.673e+13 perf-stat.instructions
0.63 -3.3% 0.61 perf-stat.ipc
7.63e+08 ± 20% +87.8% 1.433e+09 ± 3% perf-stat.node-load-misses
86.73 ± 2% -27.7 59.01 perf-stat.node-store-miss-rate%
5.359e+08 ± 9% +17.5% 6.296e+08 perf-stat.node-store-misses
83392223 ± 25% +424.4% 4.373e+08 perf-stat.node-stores
31255 +6.8% 33385 perf-stat.path-length
13.66 ± 2% -2.4 11.21 ± 6% perf-profile.calltrace.cycles-pp.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
13.45 ± 2% -2.4 11.05 ± 6% perf-profile.calltrace.cycles-pp.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
11.88 ± 2% -2.1 9.73 ± 6% perf-profile.calltrace.cycles-pp.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
11.67 ± 2% -2.1 9.58 ± 6% perf-profile.calltrace.cycles-pp.udp_recvmsg.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64
9.09 ± 2% -1.6 7.45 ± 7% perf-profile.calltrace.cycles-pp.__skb_recv_udp.udp_recvmsg.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom
7.67 ± 2% -1.4 6.27 ± 7% perf-profile.calltrace.cycles-pp.__skb_wait_for_more_packets.__skb_recv_udp.udp_recvmsg.inet_recvmsg.__sys_recvfrom
9.02 ± 2% -1.3 7.69 ± 6% perf-profile.calltrace.cycles-pp.__wake_up_common_lock.sock_def_readable.__udp_enqueue_schedule_skb.udp_queue_rcv_skb.__udp4_lib_rcv
9.25 ± 2% -1.3 7.99 ± 6% perf-profile.calltrace.cycles-pp.sock_def_readable.__udp_enqueue_schedule_skb.udp_queue_rcv_skb.__udp4_lib_rcv.ip_local_deliver_finish
8.48 ± 2% -1.2 7.24 ± 7% perf-profile.calltrace.cycles-pp.__wake_up_common.__wake_up_common_lock.sock_def_readable.__udp_enqueue_schedule_skb.udp_queue_rcv_skb
8.14 ± 3% -1.1 6.99 ± 7% perf-profile.calltrace.cycles-pp.autoremove_wake_function.__wake_up_common.__wake_up_common_lock.sock_def_readable.__udp_enqueue_schedule_skb
8.03 ± 3% -1.1 6.89 ± 7% perf-profile.calltrace.cycles-pp.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock.sock_def_readable
6.50 ± 2% -1.1 5.41 ± 7% perf-profile.calltrace.cycles-pp.schedule_timeout.__skb_wait_for_more_packets.__skb_recv_udp.udp_recvmsg.inet_recvmsg
6.11 ± 2% -1.0 5.12 ± 7% perf-profile.calltrace.cycles-pp.schedule.schedule_timeout.__skb_wait_for_more_packets.__skb_recv_udp.udp_recvmsg
5.97 ± 2% -1.0 5.02 ± 7% perf-profile.calltrace.cycles-pp.__sched_text_start.schedule.schedule_timeout.__skb_wait_for_more_packets.__skb_recv_udp
5.49 -0.9 4.60 ± 5% perf-profile.calltrace.cycles-pp.schedule_idle.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64
5.35 -0.9 4.48 ± 5% perf-profile.calltrace.cycles-pp.__sched_text_start.schedule_idle.do_idle.cpu_startup_entry.start_secondary
4.79 -0.8 3.96 ± 6% perf-profile.calltrace.cycles-pp.ip_make_skb.udp_sendmsg.sock_sendmsg.__sys_sendto.__x64_sys_sendto
4.50 ± 2% -0.7 3.85 ± 7% perf-profile.calltrace.cycles-pp.ttwu_do_activate.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
4.24 ± 2% -0.6 3.62 ± 8% perf-profile.calltrace.cycles-pp.enqueue_task_fair.ttwu_do_activate.try_to_wake_up.autoremove_wake_function.__wake_up_common
3.50 ± 3% -0.5 2.99 ± 8% perf-profile.calltrace.cycles-pp.enqueue_entity.enqueue_task_fair.ttwu_do_activate.try_to_wake_up.autoremove_wake_function
2.62 ± 2% -0.5 2.16 ± 7% perf-profile.calltrace.cycles-pp.dequeue_task_fair.__sched_text_start.schedule.schedule_timeout.__skb_wait_for_more_packets
2.88 -0.4 2.46 ± 6% perf-profile.calltrace.cycles-pp.__ip_append_data.ip_make_skb.udp_sendmsg.sock_sendmsg.__sys_sendto
1.68 -0.4 1.29 ± 4% perf-profile.calltrace.cycles-pp.__ip_make_skb.ip_make_skb.udp_sendmsg.sock_sendmsg.__sys_sendto
2.78 ± 3% -0.4 2.42 ± 7% perf-profile.calltrace.cycles-pp.menu_select.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64
1.84 -0.4 1.48 ± 6% perf-profile.calltrace.cycles-pp.pick_next_task_fair.__sched_text_start.schedule_idle.do_idle.cpu_startup_entry
0.64 ± 3% -0.4 0.28 ±100% perf-profile.calltrace.cycles-pp.__next_timer_interrupt.get_next_timer_interrupt.tick_nohz_next_event.tick_nohz_get_sleep_length.menu_select
1.89 ± 3% -0.3 1.56 ± 7% perf-profile.calltrace.cycles-pp.__entry_SYSCALL_64_trampoline
2.04 ± 4% -0.3 1.73 ± 8% perf-profile.calltrace.cycles-pp.__dev_queue_xmit.ip_finish_output2.ip_output.ip_send_skb.udp_send_skb
1.29 ± 2% -0.3 1.00 ± 4% perf-profile.calltrace.cycles-pp.__ip_select_ident.__ip_make_skb.ip_make_skb.udp_sendmsg.sock_sendmsg
1.22 ± 2% -0.3 0.94 ± 4% perf-profile.calltrace.cycles-pp.ip_idents_reserve.__ip_select_ident.__ip_make_skb.ip_make_skb.udp_sendmsg
1.32 -0.3 1.04 ± 6% perf-profile.calltrace.cycles-pp.set_next_entity.pick_next_task_fair.__sched_text_start.schedule_idle.do_idle
2.11 ± 2% -0.3 1.83 ± 6% perf-profile.calltrace.cycles-pp.sock_alloc_send_pskb.__ip_append_data.ip_make_skb.udp_sendmsg.sock_sendmsg
1.77 ± 3% -0.3 1.50 ± 8% perf-profile.calltrace.cycles-pp.tick_nohz_get_sleep_length.menu_select.do_idle.cpu_startup_entry.start_secondary
1.49 -0.3 1.22 ± 5% perf-profile.calltrace.cycles-pp.syscall_return_via_sysret
1.58 ± 4% -0.2 1.36 ± 7% perf-profile.calltrace.cycles-pp.dev_hard_start_xmit.__dev_queue_xmit.ip_finish_output2.ip_output.ip_send_skb
1.60 -0.2 1.40 ± 4% perf-profile.calltrace.cycles-pp.switch_mm_irqs_off.__sched_text_start.schedule_idle.do_idle.cpu_startup_entry
1.50 ± 4% -0.2 1.30 ± 7% perf-profile.calltrace.cycles-pp.loopback_xmit.dev_hard_start_xmit.__dev_queue_xmit.ip_finish_output2.ip_output
0.60 ± 2% -0.2 0.41 ± 58% perf-profile.calltrace.cycles-pp.selinux_socket_sock_rcv_skb.security_sock_rcv_skb.sk_filter_trim_cap.udp_queue_rcv_skb.__udp4_lib_rcv
1.48 ± 2% -0.2 1.29 ± 7% perf-profile.calltrace.cycles-pp.alloc_skb_with_frags.sock_alloc_send_pskb.__ip_append_data.ip_make_skb.udp_sendmsg
1.41 ± 2% -0.2 1.23 ± 7% perf-profile.calltrace.cycles-pp.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb.__ip_append_data.ip_make_skb
0.90 -0.2 0.73 ± 4% perf-profile.calltrace.cycles-pp._copy_to_iter.udp_recvmsg.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom
1.09 ± 3% -0.2 0.93 ± 8% perf-profile.calltrace.cycles-pp.get_next_timer_interrupt.tick_nohz_next_event.tick_nohz_get_sleep_length.menu_select.do_idle
0.81 ± 3% -0.2 0.66 ± 4% perf-profile.calltrace.cycles-pp.sock_wfree.loopback_xmit.dev_hard_start_xmit.__dev_queue_xmit.ip_finish_output2
1.02 ± 3% -0.2 0.87 ± 7% perf-profile.calltrace.cycles-pp.switch_mm.__sched_text_start.schedule.schedule_timeout.__skb_wait_for_more_packets
0.94 -0.2 0.79 ± 6% perf-profile.calltrace.cycles-pp.move_addr_to_user.__sys_recvfrom.__x64_sys_recvfrom.do_syscall_64.entry_SYSCALL_64_after_hwframe
0.72 -0.2 0.57 ± 4% perf-profile.calltrace.cycles-pp.__switch_to_asm
0.90 ± 3% -0.2 0.75 ± 5% perf-profile.calltrace.cycles-pp.__consume_stateless_skb.udp_recvmsg.inet_recvmsg.__sys_recvfrom.__x64_sys_recvfrom
0.70 ± 3% -0.2 0.54 ± 4% perf-profile.calltrace.cycles-pp.tick_nohz_idle_exit.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64
0.71 ± 2% -0.1 0.58 ± 9% perf-profile.calltrace.cycles-pp.nf_hook_slow.ip_output.ip_send_skb.udp_send_skb.udp_sendmsg
0.94 ± 3% -0.1 0.82 ± 8% perf-profile.calltrace.cycles-pp.select_task_rq_fair.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
0.99 ± 4% -0.1 0.88 ± 5% perf-profile.calltrace.cycles-pp.ip_route_output_flow.udp_sendmsg.sock_sendmsg.__sys_sendto.__x64_sys_sendto
1.07 ± 5% -0.1 0.96 ± 5% perf-profile.calltrace.cycles-pp.ttwu_do_wakeup.try_to_wake_up.autoremove_wake_function.__wake_up_common.__wake_up_common_lock
0.73 ± 3% -0.1 0.62 ± 4% perf-profile.calltrace.cycles-pp.poll_idle.cpuidle_enter_state.do_idle.cpu_startup_entry.start_secondary
1.02 ± 5% -0.1 0.92 ± 5% perf-profile.calltrace.cycles-pp.check_preempt_curr.ttwu_do_wakeup.try_to_wake_up.autoremove_wake_function.__wake_up_common
0.68 ± 2% -0.1 0.59 ± 7% perf-profile.calltrace.cycles-pp.sk_filter_trim_cap.udp_queue_rcv_skb.__udp4_lib_rcv.ip_local_deliver_finish.ip_local_deliver
0.69 ± 4% -0.1 0.60 ± 8% perf-profile.calltrace.cycles-pp._raw_spin_lock_bh.__skb_recv_udp.udp_recvmsg.inet_recvmsg.__sys_recvfrom
0.64 ± 2% -0.1 0.55 ± 6% perf-profile.calltrace.cycles-pp.security_sock_rcv_skb.sk_filter_trim_cap.udp_queue_rcv_skb.__udp4_lib_rcv.ip_local_deliver_finish
26.69 ± 2% +1.9 28.61 ± 5% perf-profile.calltrace.cycles-pp.__x64_sys_sendto.do_syscall_64.entry_SYSCALL_64_after_hwframe
26.51 ± 2% +1.9 28.45 ± 5% perf-profile.calltrace.cycles-pp.__sys_sendto.__x64_sys_sendto.do_syscall_64.entry_SYSCALL_64_after_hwframe
25.75 ± 2% +2.1 27.80 ± 5% perf-profile.calltrace.cycles-pp.sock_sendmsg.__sys_sendto.__x64_sys_sendto.do_syscall_64.entry_SYSCALL_64_after_hwframe
25.20 ± 2% +2.2 27.38 ± 5% perf-profile.calltrace.cycles-pp.udp_sendmsg.sock_sendmsg.__sys_sendto.__x64_sys_sendto.do_syscall_64
18.57 ± 2% +3.3 21.89 ± 5% perf-profile.calltrace.cycles-pp.udp_send_skb.udp_sendmsg.sock_sendmsg.__sys_sendto.__x64_sys_sendto
18.32 ± 2% +3.4 21.68 ± 5% perf-profile.calltrace.cycles-pp.ip_send_skb.udp_send_skb.udp_sendmsg.sock_sendmsg.__sys_sendto
17.95 ± 2% +3.4 21.32 ± 5% perf-profile.calltrace.cycles-pp.ip_output.ip_send_skb.udp_send_skb.udp_sendmsg.sock_sendmsg
16.70 ± 2% +3.6 20.34 ± 5% perf-profile.calltrace.cycles-pp.ip_finish_output2.ip_output.ip_send_skb.udp_send_skb.udp_sendmsg
14.36 ± 2% +4.0 18.32 ± 5% perf-profile.calltrace.cycles-pp.__local_bh_enable_ip.ip_finish_output2.ip_output.ip_send_skb.udp_send_skb
14.21 ± 2% +4.0 18.22 ± 5% perf-profile.calltrace.cycles-pp.do_softirq.__local_bh_enable_ip.ip_finish_output2.ip_output.ip_send_skb
13.99 ± 2% +4.0 18.01 ± 5% perf-profile.calltrace.cycles-pp.__softirqentry_text_start.do_softirq_own_stack.do_softirq.__local_bh_enable_ip.ip_finish_output2
14.04 ± 2% +4.0 18.07 ± 5% perf-profile.calltrace.cycles-pp.do_softirq_own_stack.do_softirq.__local_bh_enable_ip.ip_finish_output2.ip_output
13.60 ± 2% +4.1 17.66 ± 5% perf-profile.calltrace.cycles-pp.net_rx_action.__softirqentry_text_start.do_softirq_own_stack.do_softirq.__local_bh_enable_ip
13.30 ± 2% +4.1 17.42 ± 5% perf-profile.calltrace.cycles-pp.process_backlog.net_rx_action.__softirqentry_text_start.do_softirq_own_stack.do_softirq
0.00 +4.1 4.14 ± 5% perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock_irqsave.__percpu_counter_sum.__sk_mem_raise_allocated.__udp_enqueue_schedule_skb
12.53 ± 2% +4.2 16.77 ± 5% perf-profile.calltrace.cycles-pp.__netif_receive_skb_one_core.process_backlog.net_rx_action.__softirqentry_text_start.do_softirq_own_stack
12.28 ± 2% +4.2 16.52 ± 5% perf-profile.calltrace.cycles-pp.ip_rcv.__netif_receive_skb_one_core.process_backlog.net_rx_action.__softirqentry_text_start
11.88 ± 3% +4.3 16.14 ± 5% perf-profile.calltrace.cycles-pp.ip_local_deliver.ip_rcv.__netif_receive_skb_one_core.process_backlog.net_rx_action
11.73 ± 3% +4.3 16.00 ± 5% perf-profile.calltrace.cycles-pp.ip_local_deliver_finish.ip_local_deliver.ip_rcv.__netif_receive_skb_one_core.process_backlog
11.40 ± 2% +4.4 15.77 ± 6% perf-profile.calltrace.cycles-pp.__udp4_lib_rcv.ip_local_deliver_finish.ip_local_deliver.ip_rcv.__netif_receive_skb_one_core
0.00 +4.4 4.44 ± 5% perf-profile.calltrace.cycles-pp._raw_spin_lock_irqsave.__percpu_counter_sum.__sk_mem_raise_allocated.__udp_enqueue_schedule_skb.udp_queue_rcv_skb
10.89 ± 2% +4.4 15.34 ± 6% perf-profile.calltrace.cycles-pp.udp_queue_rcv_skb.__udp4_lib_rcv.ip_local_deliver_finish.ip_local_deliver.ip_rcv
9.96 ± 2% +4.6 14.57 ± 6% perf-profile.calltrace.cycles-pp.__udp_enqueue_schedule_skb.udp_queue_rcv_skb.__udp4_lib_rcv.ip_local_deliver_finish.ip_local_deliver
0.00 +5.8 5.80 ± 5% perf-profile.calltrace.cycles-pp.__percpu_counter_sum.__sk_mem_raise_allocated.__udp_enqueue_schedule_skb.udp_queue_rcv_skb.__udp4_lib_rcv
0.00 +6.0 6.01 ± 5% perf-profile.calltrace.cycles-pp.__sk_mem_raise_allocated.__udp_enqueue_schedule_skb.udp_queue_rcv_skb.__udp4_lib_rcv.ip_local_deliver_finish
13.67 ± 2% -2.5 11.22 ± 6% perf-profile.children.cycles-pp.__x64_sys_recvfrom
13.46 ± 2% -2.4 11.06 ± 6% perf-profile.children.cycles-pp.__sys_recvfrom
11.88 ± 2% -2.1 9.73 ± 6% perf-profile.children.cycles-pp.inet_recvmsg
11.69 ± 2% -2.1 9.59 ± 6% perf-profile.children.cycles-pp.udp_recvmsg
11.45 -1.8 9.62 ± 6% perf-profile.children.cycles-pp.__sched_text_start
9.11 ± 2% -1.6 7.47 ± 7% perf-profile.children.cycles-pp.__skb_recv_udp
7.67 ± 2% -1.4 6.28 ± 7% perf-profile.children.cycles-pp.__skb_wait_for_more_packets
9.03 ± 2% -1.3 7.70 ± 6% perf-profile.children.cycles-pp.__wake_up_common_lock
9.25 ± 3% -1.3 7.99 ± 6% perf-profile.children.cycles-pp.sock_def_readable
8.48 ± 2% -1.2 7.24 ± 7% perf-profile.children.cycles-pp.__wake_up_common
8.06 ± 3% -1.2 6.90 ± 7% perf-profile.children.cycles-pp.try_to_wake_up
8.15 ± 3% -1.1 7.00 ± 7% perf-profile.children.cycles-pp.autoremove_wake_function
6.11 ± 2% -1.0 5.12 ± 7% perf-profile.children.cycles-pp.schedule
5.51 -0.9 4.62 ± 5% perf-profile.children.cycles-pp.schedule_idle
4.80 -0.8 3.96 ± 6% perf-profile.children.cycles-pp.ip_make_skb
4.50 ± 2% -0.6 3.86 ± 7% perf-profile.children.cycles-pp.ttwu_do_activate
4.25 ± 2% -0.6 3.63 ± 8% perf-profile.children.cycles-pp.enqueue_task_fair
3.66 ± 3% -0.5 3.14 ± 8% perf-profile.children.cycles-pp.enqueue_entity
2.65 ± 2% -0.5 2.18 ± 7% perf-profile.children.cycles-pp.dequeue_task_fair
2.32 ± 2% -0.4 1.88 ± 7% perf-profile.children.cycles-pp.pick_next_task_fair
2.38 ± 2% -0.4 1.95 ± 8% perf-profile.children.cycles-pp.dequeue_entity
2.90 -0.4 2.47 ± 6% perf-profile.children.cycles-pp.__ip_append_data
2.31 ± 3% -0.4 1.91 ± 6% perf-profile.children.cycles-pp.__entry_SYSCALL_64_trampoline
1.68 -0.4 1.29 ± 4% perf-profile.children.cycles-pp.__ip_make_skb
2.81 ± 3% -0.4 2.44 ± 7% perf-profile.children.cycles-pp.menu_select
2.60 -0.4 2.24 ± 5% perf-profile.children.cycles-pp.switch_mm_irqs_off
2.42 ± 3% -0.3 2.11 ± 8% perf-profile.children.cycles-pp.update_load_avg
2.04 ± 4% -0.3 1.74 ± 7% perf-profile.children.cycles-pp.__dev_queue_xmit
1.69 ± 2% -0.3 1.40 ± 5% perf-profile.children.cycles-pp.syscall_return_via_sysret
1.29 ± 2% -0.3 1.00 ± 4% perf-profile.children.cycles-pp.__ip_select_ident
1.23 ± 2% -0.3 0.94 ± 4% perf-profile.children.cycles-pp.ip_idents_reserve
1.34 -0.3 1.06 ± 6% perf-profile.children.cycles-pp.set_next_entity
2.12 ± 2% -0.3 1.84 ± 6% perf-profile.children.cycles-pp.sock_alloc_send_pskb
1.79 ± 3% -0.3 1.52 ± 8% perf-profile.children.cycles-pp.tick_nohz_get_sleep_length
1.48 ± 2% -0.3 1.23 ± 3% perf-profile.children.cycles-pp._raw_spin_lock
1.24 ± 2% -0.2 1.03 ± 6% perf-profile.children.cycles-pp.load_new_mm_cr3
1.58 ± 4% -0.2 1.37 ± 7% perf-profile.children.cycles-pp.dev_hard_start_xmit
1.51 ± 4% -0.2 1.31 ± 7% perf-profile.children.cycles-pp.loopback_xmit
1.48 ± 2% -0.2 1.29 ± 7% perf-profile.children.cycles-pp.alloc_skb_with_frags
0.83 ± 3% -0.2 0.66 ± 8% perf-profile.children.cycles-pp.update_curr
1.42 ± 2% -0.2 1.24 ± 7% perf-profile.children.cycles-pp.__alloc_skb
0.73 ± 3% -0.2 0.55 ± 4% perf-profile.children.cycles-pp.tick_nohz_idle_exit
0.91 -0.2 0.73 ± 4% perf-profile.children.cycles-pp._copy_to_iter
1.07 ± 2% -0.2 0.90 ± 6% perf-profile.children.cycles-pp.switch_mm
1.10 ± 3% -0.2 0.94 ± 8% perf-profile.children.cycles-pp.get_next_timer_interrupt
0.81 ± 3% -0.2 0.66 ± 4% perf-profile.children.cycles-pp.sock_wfree
0.95 -0.2 0.80 ± 6% perf-profile.children.cycles-pp.move_addr_to_user
0.90 ± 2% -0.2 0.75 ± 5% perf-profile.children.cycles-pp.__consume_stateless_skb
0.74 -0.2 0.59 ± 4% perf-profile.children.cycles-pp.__switch_to_asm
0.78 -0.1 0.64 ± 6% perf-profile.children.cycles-pp.copy_user_generic_unrolled
0.49 -0.1 0.35 ± 5% perf-profile.children.cycles-pp.update_ts_time_stats
0.66 ± 2% -0.1 0.53 ± 5% perf-profile.children.cycles-pp.__switch_to
0.84 ± 3% -0.1 0.70 ± 8% perf-profile.children.cycles-pp.nf_hook_slow
0.71 ± 3% -0.1 0.58 ± 9% perf-profile.children.cycles-pp.__next_timer_interrupt
0.95 ± 3% -0.1 0.82 ± 7% perf-profile.children.cycles-pp.select_task_rq_fair
0.68 ± 3% -0.1 0.55 ± 12% perf-profile.children.cycles-pp.update_cfs_group
0.57 ± 4% -0.1 0.44 ± 7% perf-profile.children.cycles-pp.prepare_to_wait_exclusive
0.43 ± 7% -0.1 0.31 ± 10% perf-profile.children.cycles-pp.__enqueue_entity
0.69 ± 3% -0.1 0.56 ± 5% perf-profile.children.cycles-pp.__slab_free
0.43 -0.1 0.31 ± 7% perf-profile.children.cycles-pp.ip_generic_getfrag
0.74 ± 3% -0.1 0.63 ± 4% perf-profile.children.cycles-pp.poll_idle
0.39 ± 2% -0.1 0.28 ± 7% perf-profile.children.cycles-pp._copy_from_iter_full
1.07 ± 5% -0.1 0.97 ± 5% perf-profile.children.cycles-pp.ttwu_do_wakeup
1.04 ± 5% -0.1 0.94 ± 5% perf-profile.children.cycles-pp.check_preempt_curr
0.54 -0.1 0.44 ± 7% perf-profile.children.cycles-pp.update_rq_clock
0.57 ± 2% -0.1 0.47 ± 9% perf-profile.children.cycles-pp.selinux_ip_postroute
0.53 ± 2% -0.1 0.43 ± 4% perf-profile.children.cycles-pp.sock_def_write_space
0.68 ± 2% -0.1 0.59 ± 7% perf-profile.children.cycles-pp.sk_filter_trim_cap
0.57 -0.1 0.48 ± 5% perf-profile.children.cycles-pp.sched_clock_cpu
0.53 -0.1 0.44 ± 4% perf-profile.children.cycles-pp.copyout
0.64 ± 2% -0.1 0.55 ± 6% perf-profile.children.cycles-pp.security_sock_rcv_skb
0.56 -0.1 0.47 ± 4% perf-profile.children.cycles-pp.skb_set_owner_w
0.69 ± 5% -0.1 0.60 ± 8% perf-profile.children.cycles-pp._raw_spin_lock_bh
0.53 ± 2% -0.1 0.44 ± 4% perf-profile.children.cycles-pp.sched_clock
0.39 ± 5% -0.1 0.30 ± 8% perf-profile.children.cycles-pp.__update_load_avg_se
0.35 ± 4% -0.1 0.26 ± 6% perf-profile.children.cycles-pp.hrtimer_next_event_without
0.42 -0.1 0.34 ± 5% perf-profile.children.cycles-pp.tick_nohz_idle_enter
0.61 ± 2% -0.1 0.53 ± 6% perf-profile.children.cycles-pp.selinux_socket_sock_rcv_skb
0.38 ± 6% -0.1 0.30 ± 14% perf-profile.children.cycles-pp.___perf_sw_event
0.50 ± 2% -0.1 0.42 ± 5% perf-profile.children.cycles-pp.native_sched_clock
0.40 ± 2% -0.1 0.33 ± 4% perf-profile.children.cycles-pp.nr_iowait_cpu
0.41 ± 3% -0.1 0.34 ± 8% perf-profile.children.cycles-pp.sockfd_lookup_light
0.45 ± 3% -0.1 0.37 ± 11% perf-profile.children.cycles-pp.selinux_ip_postroute_compat
0.45 -0.1 0.37 ± 8% perf-profile.children.cycles-pp.native_write_msr
0.56 ± 4% -0.1 0.48 ± 6% perf-profile.children.cycles-pp.finish_task_switch
0.43 -0.1 0.36 ± 9% perf-profile.children.cycles-pp.ksize
0.30 ± 6% -0.1 0.23 ± 5% perf-profile.children.cycles-pp.__udp4_lib_lookup
0.33 ± 3% -0.1 0.27 ± 8% perf-profile.children.cycles-pp.__fget_light
0.31 -0.1 0.25 ± 11% perf-profile.children.cycles-pp.kmem_cache_alloc_node
0.20 -0.1 0.14 ± 3% perf-profile.children.cycles-pp.copyin
0.35 -0.1 0.28 ± 8% perf-profile.children.cycles-pp.__might_fault
0.28 ± 9% -0.1 0.22 ± 5% perf-profile.children.cycles-pp.pick_next_task_idle
0.21 ± 5% -0.1 0.15 ± 2% perf-profile.children.cycles-pp.entry_SYSCALL_64_stage2
0.49 -0.1 0.43 ± 5% perf-profile.children.cycles-pp._raw_spin_unlock_irqrestore
0.27 ± 3% -0.1 0.21 ± 7% perf-profile.children.cycles-pp.validate_xmit_skb
0.09 ± 8% -0.1 0.04 ± 57% perf-profile.children.cycles-pp.kfree
0.24 ± 5% -0.1 0.18 ± 6% perf-profile.children.cycles-pp.rb_erase_cached
0.44 ± 2% -0.1 0.39 ± 5% perf-profile.children.cycles-pp.reweight_entity
0.26 ± 3% -0.1 0.21 ± 7% perf-profile.children.cycles-pp.enqueue_to_backlog
0.24 -0.0 0.20 ± 5% perf-profile.children.cycles-pp.security_socket_sendmsg
0.21 ± 5% -0.0 0.16 ± 6% perf-profile.children.cycles-pp.kmem_cache_free
0.15 ± 7% -0.0 0.10 ± 10% perf-profile.children.cycles-pp.__hrtimer_next_event_base
0.07 ± 12% -0.0 0.03 ±100% perf-profile.children.cycles-pp.rb_next
0.24 -0.0 0.20 ± 9% perf-profile.children.cycles-pp.___might_sleep
0.18 ± 12% -0.0 0.14 perf-profile.children.cycles-pp.inet_sendmsg
0.20 ± 4% -0.0 0.16 ± 6% perf-profile.children.cycles-pp.rcu_idle_exit
0.19 ± 2% -0.0 0.15 ± 2% perf-profile.children.cycles-pp.copy_user_enhanced_fast_string
0.17 ± 7% -0.0 0.13 ± 11% perf-profile.children.cycles-pp.put_prev_task_fair
0.14 ± 10% -0.0 0.10 ± 10% perf-profile.children.cycles-pp.compute_score
0.22 ± 4% -0.0 0.19 ± 10% perf-profile.children.cycles-pp.selinux_parse_skb
0.29 ± 4% -0.0 0.25 ± 9% perf-profile.children.cycles-pp.ipv4_mtu
0.16 ± 4% -0.0 0.12 ± 6% perf-profile.children.cycles-pp.__calc_delta
0.24 -0.0 0.21 ± 8% perf-profile.children.cycles-pp.move_addr_to_kernel
0.22 ± 4% -0.0 0.19 ± 3% perf-profile.children.cycles-pp.sock_has_perm
0.23 ± 3% -0.0 0.19 ± 9% perf-profile.children.cycles-pp.__list_del_entry_valid
0.26 ± 3% -0.0 0.22 ± 4% perf-profile.children.cycles-pp.pick_next_entity
0.21 ± 3% -0.0 0.18 ± 7% perf-profile.children.cycles-pp._copy_from_user
0.19 ± 6% -0.0 0.16 ± 6% perf-profile.children.cycles-pp.__skb_try_recv_from_queue
0.20 ± 2% -0.0 0.17 ± 5% perf-profile.children.cycles-pp.rcu_eqs_enter
0.18 ± 2% -0.0 0.14 ± 5% perf-profile.children.cycles-pp.security_sk_classify_flow
0.15 ± 7% -0.0 0.11 ± 7% perf-profile.children.cycles-pp.menu_reflect
0.10 ± 5% -0.0 0.07 ± 7% perf-profile.children.cycles-pp.selinux_socket_sendmsg
0.19 ± 2% -0.0 0.16 ± 7% perf-profile.children.cycles-pp._copy_to_user
0.19 ± 4% -0.0 0.16 ± 8% perf-profile.children.cycles-pp.cpuidle_governor_latency_req
0.17 ± 4% -0.0 0.15 ± 11% perf-profile.children.cycles-pp.__might_sleep
0.17 ± 6% -0.0 0.14 perf-profile.children.cycles-pp.sock_recvmsg
0.10 ± 7% -0.0 0.08 ± 6% perf-profile.children.cycles-pp.ipv4_pktinfo_prepare
0.14 ± 7% -0.0 0.12 perf-profile.children.cycles-pp.security_socket_recvmsg
0.13 ± 3% -0.0 0.11 ± 6% perf-profile.children.cycles-pp.__get_user_4
0.13 ± 3% -0.0 0.11 ± 6% perf-profile.children.cycles-pp.selinux_sk_getsecid
0.11 ± 8% -0.0 0.08 ± 5% perf-profile.children.cycles-pp.dst_release
0.10 ± 8% -0.0 0.08 ± 5% perf-profile.children.cycles-pp.selinux_ipv4_postroute
0.11 ± 4% -0.0 0.10 ± 11% perf-profile.children.cycles-pp.pm_qos_request
0.11 ± 4% -0.0 0.09 ± 4% perf-profile.children.cycles-pp.udp_rcv
0.09 ± 4% -0.0 0.07 ± 5% perf-profile.children.cycles-pp.ip_send_check
0.05 +0.0 0.07 ± 13% perf-profile.children.cycles-pp.selinux_netlbl_sock_rcv_skb
0.06 ± 11% +0.0 0.08 ± 6% perf-profile.children.cycles-pp.rebalance_domains
0.07 ± 7% +0.0 0.09 ± 7% perf-profile.children.cycles-pp.ip_rcv_finish_core
0.09 ± 4% +0.0 0.12 ± 3% perf-profile.children.cycles-pp.ip_rcv_finish
0.15 ± 8% +0.0 0.20 ± 8% perf-profile.children.cycles-pp.irq_exit
0.11 ± 31% +0.1 0.16 ± 22% perf-profile.children.cycles-pp.clockevents_program_event
0.00 +0.1 0.06 ± 6% perf-profile.children.cycles-pp.netlbl_enabled
0.45 ± 5% +0.1 0.57 ± 7% perf-profile.children.cycles-pp.find_next_bit
0.00 +0.2 0.17 ± 4% perf-profile.children.cycles-pp.percpu_counter_add_batch
0.00 +0.3 0.34 ± 5% perf-profile.children.cycles-pp.cpumask_next
26.71 ± 2% +1.9 28.63 ± 5% perf-profile.children.cycles-pp.__x64_sys_sendto
26.51 ± 2% +1.9 28.46 ± 5% perf-profile.children.cycles-pp.__sys_sendto
25.75 ± 2% +2.1 27.81 ± 5% perf-profile.children.cycles-pp.sock_sendmsg
25.23 ± 2% +2.2 27.40 ± 5% perf-profile.children.cycles-pp.udp_sendmsg
18.58 ± 2% +3.3 21.90 ± 5% perf-profile.children.cycles-pp.udp_send_skb
18.33 ± 2% +3.4 21.68 ± 5% perf-profile.children.cycles-pp.ip_send_skb
17.95 ± 2% +3.4 21.33 ± 5% perf-profile.children.cycles-pp.ip_output
16.73 ± 2% +3.6 20.36 ± 5% perf-profile.children.cycles-pp.ip_finish_output2
14.40 ± 2% +4.0 18.36 ± 5% perf-profile.children.cycles-pp.__local_bh_enable_ip
14.24 ± 2% +4.0 18.25 ± 5% perf-profile.children.cycles-pp.do_softirq
14.05 ± 2% +4.0 18.08 ± 5% perf-profile.children.cycles-pp.do_softirq_own_stack
14.12 ± 2% +4.0 18.16 ± 5% perf-profile.children.cycles-pp.__softirqentry_text_start
13.61 ± 2% +4.1 17.67 ± 5% perf-profile.children.cycles-pp.net_rx_action
13.31 ± 2% +4.1 17.43 ± 5% perf-profile.children.cycles-pp.process_backlog
1.50 ± 2% +4.2 5.68 ± 5% perf-profile.children.cycles-pp._raw_spin_lock_irqsave
0.00 +4.2 4.19 ± 5% perf-profile.children.cycles-pp.native_queued_spin_lock_slowpath
12.28 ± 2% +4.2 16.52 ± 5% perf-profile.children.cycles-pp.ip_rcv
12.53 ± 2% +4.2 16.78 ± 5% perf-profile.children.cycles-pp.__netif_receive_skb_one_core
11.89 ± 3% +4.3 16.15 ± 5% perf-profile.children.cycles-pp.ip_local_deliver
11.73 ± 3% +4.3 16.00 ± 5% perf-profile.children.cycles-pp.ip_local_deliver_finish
11.41 ± 2% +4.4 15.78 ± 6% perf-profile.children.cycles-pp.__udp4_lib_rcv
10.90 ± 2% +4.4 15.35 ± 6% perf-profile.children.cycles-pp.udp_queue_rcv_skb
9.97 ± 2% +4.6 14.58 ± 6% perf-profile.children.cycles-pp.__udp_enqueue_schedule_skb
0.00 +5.8 5.84 ± 5% perf-profile.children.cycles-pp.__percpu_counter_sum
0.15 ± 3% +5.9 6.01 ± 5% perf-profile.children.cycles-pp.__sk_mem_raise_allocated
2.25 ± 3% -0.4 1.85 ± 6% perf-profile.self.cycles-pp.__entry_SYSCALL_64_trampoline
2.20 -0.3 1.89 ± 5% perf-profile.self.cycles-pp.__sched_text_start
1.69 ± 2% -0.3 1.39 ± 5% perf-profile.self.cycles-pp.syscall_return_via_sysret
1.22 ± 2% -0.3 0.93 ± 4% perf-profile.self.cycles-pp.ip_idents_reserve
1.43 ± 2% -0.3 1.16 ± 4% perf-profile.self.cycles-pp._raw_spin_lock
1.24 ± 2% -0.2 1.03 ± 6% perf-profile.self.cycles-pp.load_new_mm_cr3
1.01 -0.2 0.82 ± 7% perf-profile.self.cycles-pp.do_idle
0.45 -0.2 0.29 ± 5% perf-profile.self.cycles-pp.__skb_wait_for_more_packets
1.05 ± 2% -0.2 0.90 ± 6% perf-profile.self.cycles-pp.update_load_avg
0.74 -0.2 0.59 ± 4% perf-profile.self.cycles-pp.__switch_to_asm
0.60 ± 2% -0.1 0.45 ± 6% perf-profile.self.cycles-pp.udp_sendmsg
0.76 -0.1 0.62 ± 5% perf-profile.self.cycles-pp.enqueue_entity
1.35 -0.1 1.21 ± 5% perf-profile.self.cycles-pp.switch_mm_irqs_off
0.63 ± 2% -0.1 0.50 ± 5% perf-profile.self.cycles-pp.__switch_to
0.76 -0.1 0.62 ± 6% perf-profile.self.cycles-pp.copy_user_generic_unrolled
0.67 ± 3% -0.1 0.54 ± 12% perf-profile.self.cycles-pp.update_cfs_group
0.56 ± 3% -0.1 0.44 ± 8% perf-profile.self.cycles-pp.update_curr
0.68 ± 3% -0.1 0.56 ± 5% perf-profile.self.cycles-pp.__slab_free
0.42 ± 7% -0.1 0.30 ± 11% perf-profile.self.cycles-pp.__enqueue_entity
0.66 -0.1 0.54 ± 5% perf-profile.self.cycles-pp.set_next_entity
0.34 ± 2% -0.1 0.23 ± 3% perf-profile.self.cycles-pp.ip_output
0.59 ± 2% -0.1 0.49 ± 4% perf-profile.self.cycles-pp.enqueue_task_fair
0.53 ± 3% -0.1 0.42 ± 9% perf-profile.self.cycles-pp.pick_next_task_fair
0.51 -0.1 0.42 ± 7% perf-profile.self.cycles-pp.do_syscall_64
0.64 -0.1 0.55 ± 4% perf-profile.self.cycles-pp.poll_idle
0.54 -0.1 0.45 ± 5% perf-profile.self.cycles-pp.move_addr_to_user
0.63 ± 2% -0.1 0.54 ± 6% perf-profile.self.cycles-pp.select_task_rq_fair
0.40 ± 4% -0.1 0.31 ± 5% perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe
0.56 -0.1 0.47 ± 4% perf-profile.self.cycles-pp.skb_set_owner_w
0.52 ± 2% -0.1 0.43 ± 5% perf-profile.self.cycles-pp.sock_def_write_space
0.20 ± 6% -0.1 0.11 ± 7% perf-profile.self.cycles-pp.check_preempt_curr
0.68 ± 5% -0.1 0.60 ± 8% perf-profile.self.cycles-pp._raw_spin_lock_bh
0.53 ± 5% -0.1 0.45 ± 5% perf-profile.self.cycles-pp.udp_recvmsg
0.38 ± 5% -0.1 0.30 ± 8% perf-profile.self.cycles-pp.__update_load_avg_se
0.31 ± 2% -0.1 0.23 ± 8% perf-profile.self.cycles-pp.__skb_recv_udp
0.51 ± 2% -0.1 0.43 ± 10% perf-profile.self.cycles-pp.dequeue_entity
0.26 ± 4% -0.1 0.18 ± 6% perf-profile.self.cycles-pp.__ip_make_skb
0.48 ± 2% -0.1 0.41 ± 4% perf-profile.self.cycles-pp.native_sched_clock
0.29 ± 4% -0.1 0.22 ± 5% perf-profile.self.cycles-pp._copy_to_iter
0.44 -0.1 0.37 ± 7% perf-profile.self.cycles-pp.native_write_msr
0.44 -0.1 0.37 ± 5% perf-profile.self.cycles-pp._raw_spin_unlock_irqrestore
0.40 -0.1 0.33 ± 4% perf-profile.self.cycles-pp.nr_iowait_cpu
0.25 ± 4% -0.1 0.18 ± 8% perf-profile.self.cycles-pp.__wake_up_common
0.42 -0.1 0.35 ± 9% perf-profile.self.cycles-pp.ksize
0.32 -0.1 0.26 ± 9% perf-profile.self.cycles-pp.__next_timer_interrupt
0.28 ± 3% -0.1 0.21 ± 5% perf-profile.self.cycles-pp.sock_wfree
0.32 ± 2% -0.1 0.26 ± 8% perf-profile.self.cycles-pp.__fget_light
0.31 -0.1 0.25 ± 5% perf-profile.self.cycles-pp.update_rq_clock
0.49 -0.1 0.43 ± 6% perf-profile.self.cycles-pp.__udp_enqueue_schedule_skb
0.21 ± 4% -0.1 0.15 ± 5% perf-profile.self.cycles-pp.__sys_recvfrom
0.29 ± 5% -0.1 0.24 ± 6% perf-profile.self.cycles-pp.try_to_wake_up
0.09 ± 8% -0.1 0.04 ± 57% perf-profile.self.cycles-pp.kfree
0.20 ± 7% -0.1 0.15 ± 2% perf-profile.self.cycles-pp.entry_SYSCALL_64_stage2
0.43 ± 2% -0.0 0.39 ± 6% perf-profile.self.cycles-pp.reweight_entity
0.35 ± 4% -0.0 0.31 ± 6% perf-profile.self.cycles-pp.__softirqentry_text_start
0.17 ± 2% -0.0 0.12 ± 8% perf-profile.self.cycles-pp.__local_bh_enable_ip
0.25 ± 4% -0.0 0.21 ± 7% perf-profile.self.cycles-pp.net_rx_action
0.08 ± 5% -0.0 0.04 ± 57% perf-profile.self.cycles-pp.tick_nohz_idle_exit
0.26 ± 4% -0.0 0.21 ± 7% perf-profile.self.cycles-pp.__sys_sendto
0.20 ± 4% -0.0 0.16 ± 5% perf-profile.self.cycles-pp.__x64_sys_recvfrom
0.12 ± 4% -0.0 0.08 ± 8% perf-profile.self.cycles-pp.udp_queue_rcv_skb
0.27 -0.0 0.22 ± 7% perf-profile.self.cycles-pp.loopback_xmit
0.18 ± 7% -0.0 0.14 ± 5% perf-profile.self.cycles-pp.inet_recvmsg
0.14 ± 10% -0.0 0.10 ± 8% perf-profile.self.cycles-pp.compute_score
0.19 ± 6% -0.0 0.15 ± 8% perf-profile.self.cycles-pp.selinux_socket_sock_rcv_skb
0.15 ± 13% -0.0 0.11 ± 7% perf-profile.self.cycles-pp.ip_local_deliver_finish
0.20 ± 4% -0.0 0.16 ± 7% perf-profile.self.cycles-pp.rb_erase_cached
0.22 ± 5% -0.0 0.18 ± 8% perf-profile.self.cycles-pp.__alloc_skb
0.18 ± 2% -0.0 0.14 ± 9% perf-profile.self.cycles-pp.kmem_cache_alloc_node
0.20 ± 2% -0.0 0.16 ± 8% perf-profile.self.cycles-pp.rcu_eqs_enter
0.18 ± 3% -0.0 0.14 ± 3% perf-profile.self.cycles-pp.copy_user_enhanced_fast_string
0.17 ± 15% -0.0 0.13 perf-profile.self.cycles-pp.inet_sendmsg
0.29 ± 2% -0.0 0.26 ± 4% perf-profile.self.cycles-pp.process_backlog
0.23 ± 3% -0.0 0.19 ± 2% perf-profile.self.cycles-pp.pick_next_entity
0.15 ± 5% -0.0 0.11 ± 9% perf-profile.self.cycles-pp.selinux_ip_postroute_compat
0.16 ± 6% -0.0 0.12 ± 6% perf-profile.self.cycles-pp.__calc_delta
0.11 ± 3% -0.0 0.08 ± 8% perf-profile.self.cycles-pp.validate_xmit_skb
0.09 ± 5% -0.0 0.05 ± 9% perf-profile.self.cycles-pp._copy_from_iter_full
0.09 ± 5% -0.0 0.05 ± 9% perf-profile.self.cycles-pp.selinux_socket_sendmsg
0.22 ± 5% -0.0 0.19 ± 3% perf-profile.self.cycles-pp.sock_has_perm
0.15 ± 5% -0.0 0.12 ± 4% perf-profile.self.cycles-pp.__udp4_lib_lookup
0.15 ± 4% -0.0 0.12 ± 10% perf-profile.self.cycles-pp.__might_sleep
0.09 ± 4% -0.0 0.06 ± 6% perf-profile.self.cycles-pp.nf_hook_slow
0.13 ± 6% -0.0 0.10 ± 8% perf-profile.self.cycles-pp.prepare_to_wait_exclusive
0.15 ± 2% -0.0 0.12 ± 5% perf-profile.self.cycles-pp.enqueue_to_backlog
0.08 ± 5% -0.0 0.06 ± 15% perf-profile.self.cycles-pp.ip_make_skb
0.14 ± 6% -0.0 0.11 ± 3% perf-profile.self.cycles-pp.ip_rcv
0.13 -0.0 0.11 ± 4% perf-profile.self.cycles-pp.selinux_ip_postroute
0.10 -0.0 0.08 ± 6% perf-profile.self.cycles-pp.sock_sendmsg
0.10 -0.0 0.08 ± 10% perf-profile.self.cycles-pp.__might_fault
0.09 ± 9% -0.0 0.07 ± 13% perf-profile.self.cycles-pp.put_prev_task_fair
0.12 ± 3% -0.0 0.10 ± 5% perf-profile.self.cycles-pp.rcu_idle_exit
0.12 ± 8% -0.0 0.10 ± 7% perf-profile.self.cycles-pp.menu_reflect
0.06 -0.0 0.04 ± 57% perf-profile.self.cycles-pp.__ip_local_out
0.13 ± 3% -0.0 0.11 ± 7% perf-profile.self.cycles-pp.selinux_sock_rcv_skb_compat
0.08 ± 5% -0.0 0.06 ± 14% perf-profile.self.cycles-pp.__skb_try_recv_from_queue
0.09 ± 4% -0.0 0.07 ± 5% perf-profile.self.cycles-pp.selinux_ipv4_postroute
0.10 ± 9% -0.0 0.08 ± 6% perf-profile.self.cycles-pp.dst_release
0.08 ± 5% -0.0 0.07 ± 7% perf-profile.self.cycles-pp.schedule
0.13 ± 6% -0.0 0.11 ± 6% perf-profile.self.cycles-pp.__get_user_4
0.11 ± 7% -0.0 0.09 ± 7% perf-profile.self.cycles-pp.netif_skb_features
0.11 ± 7% -0.0 0.09 ± 7% perf-profile.self.cycles-pp.__hrtimer_next_event_base
0.18 ± 4% -0.0 0.17 ± 5% perf-profile.self.cycles-pp.__x64_sys_sendto
0.11 ± 3% -0.0 0.10 ± 5% perf-profile.self.cycles-pp.switch_mm
0.12 ± 3% -0.0 0.10 ± 7% perf-profile.self.cycles-pp.selinux_sk_getsecid
0.09 ± 4% -0.0 0.08 ± 6% perf-profile.self.cycles-pp.udp_rcv
0.07 -0.0 0.05 ± 9% perf-profile.self.cycles-pp.update_min_vruntime
0.09 ± 5% -0.0 0.07 perf-profile.self.cycles-pp.ip_send_check
0.08 ± 10% -0.0 0.06 ± 6% perf-profile.self.cycles-pp.sockfd_lookup_light
0.06 ± 11% +0.0 0.09 ± 7% perf-profile.self.cycles-pp.ip_rcv_finish_core
0.00 +0.1 0.06 ± 9% perf-profile.self.cycles-pp.netlbl_enabled
0.00 +0.1 0.06 ± 15% perf-profile.self.cycles-pp.hrtimer_get_next_event
0.21 ± 7% +0.1 0.29 ± 3% perf-profile.self.cycles-pp.sock_def_readable
0.00 +0.1 0.11 ± 7% perf-profile.self.cycles-pp.cpumask_next
0.38 ± 6% +0.1 0.50 ± 7% perf-profile.self.cycles-pp.find_next_bit
0.00 +0.2 0.17 ± 4% perf-profile.self.cycles-pp.percpu_counter_add_batch
0.00 +1.1 1.07 ± 5% perf-profile.self.cycles-pp.__percpu_counter_sum
0.00 +4.2 4.19 ± 5% perf-profile.self.cycles-pp.native_queued_spin_lock_slowpath





Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.


Thanks,
Rong Chen


Attachments:
(No filename) (140.76 kB)
config-4.19.0-rc2-00128-gb99259a (163.74 kB)
job-script (7.27 kB)
job.yaml (4.89 kB)
reproduce (1.29 kB)
Download all attachments