LinuxLists.cc - [PATCH v4] sock: add tracepoint for send recv length

2023-01-08 03:02:49

Subject: [PATCH v4] sock: add tracepoint for send recv length

Add 2 tracepoints to monitor the tcp/udp traffic
of per process and per cgroup.

Regarding monitoring the tcp/udp traffic of each process, there are two
existing solutions, the first one is https://www.atoptool.nl/netatop.php.
The second is via kprobe/kretprobe.

Netatop solution is implemented by registering the hook function at the
hook point provided by the netfilter framework.

These hook functions may be in the soft interrupt context and cannot
directly obtain the pid. Some data structures are added to bind packets
and processes. For example, struct taskinfobucket, struct taskinfo ...

Every time the process sends and receives packets it needs multiple
hashmaps,resulting in low performance and it has the problem fo inaccurate
tcp/udp traffic statistics(for example: multiple threads share sockets).

We can obtain the information with kretprobe, but as we know, kprobe gets
the result by trappig in an exception, which loses performance compared
to tracepoint.

We compared the performance of tracepoints with the above two methods, and
the results are as follows:

ab -n 1000000 -c 1000 -r http://127.0.0.1/index.html
without trace:
Time per request: 39.660 [ms] (mean)
Time per request: 0.040 [ms] (mean, across all concurrent requests)

netatop:
Time per request: 50.717 [ms] (mean)
Time per request: 0.051 [ms] (mean, across all concurrent requests)

kr:
Time per request: 43.168 [ms] (mean)
Time per request: 0.043 [ms] (mean, across all concurrent requests)

tracepoint:
Time per request: 41.004 [ms] (mean)
Time per request: 0.041 [ms] (mean, across all concurrent requests

It can be seen that tracepoint has better performance.

Signed-off-by: Yunhui Cui <[email protected]>
Signed-off-by: Xiongchun Duan <[email protected]>
---
include/trace/events/sock.h | 48 +++++++++++++++++++++++++++++++++++++
net/socket.c | 23 ++++++++++++++----
2 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index 777ee6cbe933..d00a5b272404 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -263,6 +263,54 @@ TRACE_EVENT(inet_sk_error_report,
__entry->error)
);

+/*
+ * sock send/recv msg length
+ */
+DECLARE_EVENT_CLASS(sock_msg_length,
+
+ TP_PROTO(struct sock *sk, __u16 family, __u16 protocol, int ret,
+ int flags),
+
+ TP_ARGS(sk, family, protocol, ret, flags),
+
+ TP_STRUCT__entry(
+ __field(void *, sk)
+ __field(__u16, family)
+ __field(__u16, protocol)
+ __field(int, length)
+ __field(int, error)
+ __field(int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->family = sk->sk_family;
+ __entry->protocol = sk->sk_protocol;
+ __entry->length = ret > 0 ? ret : 0;
+ __entry->error = ret < 0 ? ret : 0;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("sk address = %p, family = %s protocol = %s, length = %d, error = %d, flags = 0x%x",
+ __entry->sk, show_family_name(__entry->family),
+ show_inet_protocol_name(__entry->protocol),
+ __entry->length,
+ __entry->error, __entry->flags)
+);
+
+DEFINE_EVENT(sock_msg_length, sock_send_length,
+ TP_PROTO(struct sock *sk, __u16 family, __u16 protocol, int ret,
+ int flags),
+
+ TP_ARGS(sk, family, protocol, ret, flags)
+);
+
+DEFINE_EVENT(sock_msg_length, sock_recv_length,
+ TP_PROTO(struct sock *sk, __u16 family, __u16 protocol, int ret,
+ int flags),
+
+ TP_ARGS(sk, family, protocol, ret, flags)
+);
#endif /* _TRACE_SOCK_H */

/* This part must be outside protection */
diff --git a/net/socket.c b/net/socket.c
index 888cd618a968..60a1ff95b4b1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -106,6 +106,7 @@
#include <net/busy_poll.h>
#include <linux/errqueue.h>
#include <linux/ptp_clock_kernel.h>
+#include <trace/events/sock.h>

#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
@@ -715,6 +716,9 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
inet_sendmsg, sock, msg,
msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);
+
+ trace_sock_send_length(sock->sk, sock->sk->sk_family,
+ sock->sk->sk_protocol, ret, 0);
return ret;
}

@@ -992,9 +996,15 @@ INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
- return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
- inet_recvmsg, sock, msg, msg_data_left(msg),
- flags);
+ int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
+ inet_recvmsg, sock, msg,
+ msg_data_left(msg), flags);
+
+ trace_sock_recv_length(sock->sk, sock->sk->sk_family,
+ sock->sk->sk_protocol,
+ !(flags & MSG_PEEK) ? ret :
+ (ret < 0 ? ret : 0), flags);
+ return ret;
}

/**
@@ -1044,6 +1054,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
{
struct socket *sock;
int flags;
+ int ret;

sock = file->private_data;

@@ -1051,7 +1062,11 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
flags |= more;

- return kernel_sendpage(sock, page, offset, size, flags);
+ ret = kernel_sendpage(sock, page, offset, size, flags);
+
+ trace_sock_send_length(sock->sk, sock->sk->sk_family,
+ sock->sk->sk_protocol, ret, 0);
+ return ret;
}

static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
--
2.20.1

2023-01-09 10:03:27

by Eric Dumazet

[permalink] [raw]

Subject: Re: [PATCH v4] sock: add tracepoint for send recv length

On Sun, Jan 8, 2023 at 3:56 AM Yunhui Cui <[email protected]> wrote:
>
> Add 2 tracepoints to monitor the tcp/udp traffic
> of per process and per cgroup.
>
> Regarding monitoring the tcp/udp traffic of each process, there are two
> existing solutions, the first one is https://www.atoptool.nl/netatop.php.
> The second is via kprobe/kretprobe.
>
> Netatop solution is implemented by registering the hook function at the
> hook point provided by the netfilter framework.
>
> These hook functions may be in the soft interrupt context and cannot
> directly obtain the pid. Some data structures are added to bind packets
> and processes. For example, struct taskinfobucket, struct taskinfo ...
>
> Every time the process sends and receives packets it needs multiple
> hashmaps,resulting in low performance and it has the problem fo inaccurate
> tcp/udp traffic statistics(for example: multiple threads share sockets).
>
> We can obtain the information with kretprobe, but as we know, kprobe gets
> the result by trappig in an exception, which loses performance compared
> to tracepoint.
>
> We compared the performance of tracepoints with the above two methods, and
> the results are as follows:
>
> ab -n 1000000 -c 1000 -r http://127.0.0.1/index.html
> without trace:
> Time per request: 39.660 [ms] (mean)
> Time per request: 0.040 [ms] (mean, across all concurrent requests)
>
> netatop:
> Time per request: 50.717 [ms] (mean)
> Time per request: 0.051 [ms] (mean, across all concurrent requests)
>
> kr:
> Time per request: 43.168 [ms] (mean)
> Time per request: 0.043 [ms] (mean, across all concurrent requests)
>
> tracepoint:
> Time per request: 41.004 [ms] (mean)
> Time per request: 0.041 [ms] (mean, across all concurrent requests
>
> It can be seen that tracepoint has better performance.
>
> Signed-off-by: Yunhui Cui <[email protected]>
> Signed-off-by: Xiongchun Duan <[email protected]>
> ---
> include/trace/events/sock.h | 48 +++++++++++++++++++++++++++++++++++++
> net/socket.c | 23 ++++++++++++++----
> 2 files changed, 67 insertions(+), 4 deletions(-)
>
> diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
> index 777ee6cbe933..d00a5b272404 100644
> --- a/include/trace/events/sock.h
> +++ b/include/trace/events/sock.h
> @@ -263,6 +263,54 @@ TRACE_EVENT(inet_sk_error_report,
> __entry->error)
> );
>
> +/*
> + * sock send/recv msg length
> + */
> +DECLARE_EVENT_CLASS(sock_msg_length,
> +
> + TP_PROTO(struct sock *sk, __u16 family, __u16 protocol, int ret,
> + int flags),
> +
> + TP_ARGS(sk, family, protocol, ret, flags),
> +
> + TP_STRUCT__entry(
> + __field(void *, sk)
> + __field(__u16, family)
> + __field(__u16, protocol)
> + __field(int, length)
> + __field(int, error)
> + __field(int, flags)
> + ),
> +
> + TP_fast_assign(
> + __entry->sk = sk;
> + __entry->family = sk->sk_family;
> + __entry->protocol = sk->sk_protocol;
> + __entry->length = ret > 0 ? ret : 0;
> + __entry->error = ret < 0 ? ret : 0;
> + __entry->flags = flags;
> + ),
> +
> + TP_printk("sk address = %p, family = %s protocol = %s, length = %d, error = %d, flags = 0x%x",
> + __entry->sk, show_family_name(__entry->family),
> + show_inet_protocol_name(__entry->protocol),
> + __entry->length,
> + __entry->error, __entry->flags)
> +);
> +
> +DEFINE_EVENT(sock_msg_length, sock_send_length,
> + TP_PROTO(struct sock *sk, __u16 family, __u16 protocol, int ret,
> + int flags),
> +
> + TP_ARGS(sk, family, protocol, ret, flags)
> +);
> +
> +DEFINE_EVENT(sock_msg_length, sock_recv_length,
> + TP_PROTO(struct sock *sk, __u16 family, __u16 protocol, int ret,
> + int flags),
> +
> + TP_ARGS(sk, family, protocol, ret, flags)
> +);
> #endif /* _TRACE_SOCK_H */
>
> /* This part must be outside protection */
> diff --git a/net/socket.c b/net/socket.c
> index 888cd618a968..60a1ff95b4b1 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -106,6 +106,7 @@
> #include <net/busy_poll.h>
> #include <linux/errqueue.h>
> #include <linux/ptp_clock_kernel.h>
> +#include <trace/events/sock.h>
>
> #ifdef CONFIG_NET_RX_BUSY_POLL
> unsigned int sysctl_net_busy_read __read_mostly;
> @@ -715,6 +716,9 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
> inet_sendmsg, sock, msg,
> msg_data_left(msg));
> BUG_ON(ret == -EIOCBQUEUED);
> +
> + trace_sock_send_length(sock->sk, sock->sk->sk_family,
> + sock->sk->sk_protocol, ret, 0);

Note: At least for CONFIG_RETPOLINE=y and gcc 12.2, compiler adds many
additional instructions (and additional memory reads),
even when the trace point is not enabled.

Contrary to some belief, adding a tracepoint is not always 'free'.
tail calls for example are replaced with normal calls.

sock_recvmsg_nosec:
pushq %r12 #
movl %edx, %r12d # tmp123, flags
pushq %rbp #
# net/socket.c:999: int ret =
INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
movl %r12d, %ecx # flags,
# net/socket.c:998: {
movq %rdi, %rbp # tmp121, sock
pushq %rbx #
# net/socket.c:999: int ret =
INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
movq 32(%rdi), %rax # sock_19(D)->ops, sock_19(D)->ops
# ./include/linux/uio.h:270: return i->count;
movq 32(%rsi), %rdx # MEM[(const struct iov_iter
*)msg_20(D) + 16B].count, pretmp_48
# net/socket.c:999: int ret =
INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
movq 144(%rax), %rax # _1->recvmsg, _2
cmpq $inet6_recvmsg, %rax #, _2
jne .L107 #,
call inet6_recvmsg #
movl %eax, %ebx # tmp124, <retval>
.L108:
# net/socket.c:1003: trace_sock_recv_length(sock->sk, sock->sk->sk_family,
xorl %r8d, %r8d # tmp127
testl %ebx, %ebx # <retval>
# net/socket.c:1004: sock->sk->sk_protocol,
movq 24(%rbp), %rsi # sock_19(D)->sk, _10
# net/socket.c:1003: trace_sock_recv_length(sock->sk, sock->sk->sk_family,
cmovle %ebx, %r8d # <retval>,, tmp119
testb $2, %r12b #, flags
# net/socket.c:1004: sock->sk->sk_protocol,
movzwl 516(%rsi), %ecx # _10->sk_protocol,
# net/socket.c:1003: trace_sock_recv_length(sock->sk, sock->sk->sk_family,
movzwl 16(%rsi), %edx # _10->__sk_common.skc_family,
# net/socket.c:1003: trace_sock_recv_length(sock->sk, sock->sk->sk_family,
cmove %ebx, %r8d # tmp119,, <retval>, iftmp.54_16
# ./arch/x86/include/asm/jump_label.h:27: asm_volatile_goto("1:"
#APP
# 27 "./arch/x86/include/asm/jump_label.h" 1
1:jmp .L111 # objtool NOPs this #
.pushsection __jump_table, "aw"
.balign 8
.long 1b - .
.long .L111 - . #
.quad __tracepoint_sock_recv_length+8 + 2 - . #,
.popsection

# 0 "" 2
#NO_APP
.L106:
# net/socket.c:1008: }
movl %ebx, %eax # <retval>,
popq %rbx #
popq %rbp #
popq %r12 #
ret
.L111:
# ./include/trace/events/sock.h:308: DEFINE_EVENT(sock_msg_length,
sock_recv_length,

> return ret;
> }
>
> @@ -992,9 +996,15 @@ INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
> static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
> int flags)
> {
> - return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
> - inet_recvmsg, sock, msg, msg_data_left(msg),
> - flags);
> + int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
> + inet_recvmsg, sock, msg,
> + msg_data_left(msg), flags);
> +
> + trace_sock_recv_length(sock->sk, sock->sk->sk_family,
> + sock->sk->sk_protocol,
> + !(flags & MSG_PEEK) ? ret :
> + (ret < 0 ? ret : 0), flags);
> + return ret;
> }
>
> /**
> @@ -1044,6 +1054,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
> {
> struct socket *sock;
> int flags;
> + int ret;
>
> sock = file->private_data;
>
> @@ -1051,7 +1062,11 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
> /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
> flags |= more;
>
> - return kernel_sendpage(sock, page, offset, size, flags);
> + ret = kernel_sendpage(sock, page, offset, size, flags);
> +
> + trace_sock_send_length(sock->sk, sock->sk->sk_family,
> + sock->sk->sk_protocol, ret, 0);
> + return ret;
> }
>
> static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
> --
> 2.20.1
>

2023-01-09 13:40:30

by yunhui cui

[permalink] [raw]

Subject: Re: [External] Re: [PATCH v4] sock: add tracepoint for send recv length

On Mon, Jan 9, 2023 at 5:56 PM Eric Dumazet <[email protected]> wrote:

>
> Note: At least for CONFIG_RETPOLINE=y and gcc 12.2, compiler adds many
> additional instructions (and additional memory reads),
> even when the trace point is not enabled.
>
> Contrary to some belief, adding a tracepoint is not always 'free'.
> tail calls for example are replaced with normal calls.
>

> .popsection
>
> # 0 "" 2
> #NO_APP
> .L106:
> # net/socket.c:1008: }
> movl %ebx, %eax # <retval>,
> popq %rbx #
> popq %rbp #
> popq %r12 #
> ret
> .L111:
> # ./include/trace/events/sock.h:308: DEFINE_EVENT(sock_msg_length,
> sock_recv_length,
>

Hi Eric, Thanks for your reply, In fact, it is because the
definition of the tracepoint function is inline,
Not just these two tracepoints，right?

#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \
...
static inline void trace_##name(proto)

Regarding the above issue, I plan to optimize it like this:

static noinline void call_trace_sock_send_length(struct sock *sk, __u16 family,
__u16 protocol, int ret, int flags)
{
trace_sock_send_length(sk, family, protocol, ret, 0);
}

static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
inet_sendmsg, sock, msg,
msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);

if (trace_sock_send_length_enabled()) {
call_trace_sock_send_length(sock->sk, sock->sk->sk_family,
sock->sk->sk_protocol, ret, 0);
}
return ret;
}

What do you think?

Thanks,
Yunhui

2023-01-09 15:32:43

by Eric Dumazet

[permalink] [raw]

Subject: Re: [External] Re: [PATCH v4] sock: add tracepoint for send recv length

On Mon, Jan 9, 2023 at 2:13 PM 运辉崔 <[email protected]> wrote:
>
> On Mon, Jan 9, 2023 at 5:56 PM Eric Dumazet <[email protected]> wrote:
>
> >
> > Note: At least for CONFIG_RETPOLINE=y and gcc 12.2, compiler adds many
> > additional instructions (and additional memory reads),
> > even when the trace point is not enabled.
> >
> > Contrary to some belief, adding a tracepoint is not always 'free'.
> > tail calls for example are replaced with normal calls.
> >
>
>
> > .popsection
> >
> > # 0 "" 2
> > #NO_APP
> > .L106:
> > # net/socket.c:1008: }
> > movl %ebx, %eax # <retval>,
> > popq %rbx #
> > popq %rbp #
> > popq %r12 #
> > ret
> > .L111:
> > # ./include/trace/events/sock.h:308: DEFINE_EVENT(sock_msg_length,
> > sock_recv_length,
> >
>
> Hi Eric, Thanks for your reply, In fact, it is because the
> definition of the tracepoint function is inline,
> Not just these two tracepoints，right?
>
> #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \
> ...
> static inline void trace_##name(proto)
>
> Regarding the above issue, I plan to optimize it like this:
>
> static noinline void call_trace_sock_send_length(struct sock *sk, __u16 family,
> __u16 protocol, int ret, int flags)
> {
> trace_sock_send_length(sk, family, protocol, ret, 0);
> }
>
> static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
> {
> int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
> inet_sendmsg, sock, msg,
> msg_data_left(msg));
> BUG_ON(ret == -EIOCBQUEUED);
>
> if (trace_sock_send_length_enabled()) {

A barrier() is needed here, with the current state of affairs.

IMO, ftrace/x86 experts should take care of this generic issue ?

> call_trace_sock_send_length(sock->sk, sock->sk->sk_family,
> sock->sk->sk_protocol, ret, 0);
> }
> return ret;
> }
>
> What do you think?
>
> Thanks,
> Yunhui

2023-01-10 03:49:20

by Dust Li

[permalink] [raw]

Subject: Re: [PATCH v4] sock: add tracepoint for send recv length

On Sun, Jan 08, 2023 at 10:55:45AM +0800, Yunhui Cui wrote:
>Add 2 tracepoints to monitor the tcp/udp traffic
>of per process and per cgroup.
>
>Regarding monitoring the tcp/udp traffic of each process, there are two
>existing solutions, the first one is https://www.atoptool.nl/netatop.php.
>The second is via kprobe/kretprobe.
>
>Netatop solution is implemented by registering the hook function at the
>hook point provided by the netfilter framework.
>
>These hook functions may be in the soft interrupt context and cannot
>directly obtain the pid. Some data structures are added to bind packets
>and processes. For example, struct taskinfobucket, struct taskinfo ...
>
>Every time the process sends and receives packets it needs multiple
>hashmaps,resulting in low performance and it has the problem fo inaccurate
>tcp/udp traffic statistics(for example: multiple threads share sockets).
>
>We can obtain the information with kretprobe, but as we know, kprobe gets
>the result by trappig in an exception, which loses performance compared
>to tracepoint.
>
>We compared the performance of tracepoints with the above two methods, and
>the results are as follows:
>
>ab -n 1000000 -c 1000 -r http://127.0.0.1/index.html

AFAIK, ab are relatively slow compared to some network benchmarks since
it's a http benchmark.
Can you test other benchmarks like sockperf or redis-benchmark with small
packets, and check the PPS drop ? Those benchmarks should have larger PPS.

Like Eric pointed out, those tracepoints in the datapath are not free,
its better to make sure we don't get a noticeable PPS drop after adding
those tracepoints.

Thanks.

>without trace:
>Time per request: 39.660 [ms] (mean)
>Time per request: 0.040 [ms] (mean, across all concurrent requests)
>
>netatop:
>Time per request: 50.717 [ms] (mean)
>Time per request: 0.051 [ms] (mean, across all concurrent requests)
>
>kr:
>Time per request: 43.168 [ms] (mean)
>Time per request: 0.043 [ms] (mean, across all concurrent requests)
>
>tracepoint:
>Time per request: 41.004 [ms] (mean)
>Time per request: 0.041 [ms] (mean, across all concurrent requests
>
>It can be seen that tracepoint has better performance.
>
>Signed-off-by: Yunhui Cui <[email protected]>
>Signed-off-by: Xiongchun Duan <[email protected]>
>---
> include/trace/events/sock.h | 48 +++++++++++++++++++++++++++++++++++++
> net/socket.c | 23 ++++++++++++++----
> 2 files changed, 67 insertions(+), 4 deletions(-)
>
>diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
>index 777ee6cbe933..d00a5b272404 100644
>--- a/include/trace/events/sock.h
>+++ b/include/trace/events/sock.h
>@@ -263,6 +263,54 @@ TRACE_EVENT(inet_sk_error_report,
> __entry->error)
> );
>
>+/*
>+ * sock send/recv msg length
>+ */
>+DECLARE_EVENT_CLASS(sock_msg_length,
>+
>+ TP_PROTO(struct sock *sk, __u16 family, __u16 protocol, int ret,
>+ int flags),
>+
>+ TP_ARGS(sk, family, protocol, ret, flags),
>+
>+ TP_STRUCT__entry(
>+ __field(void *, sk)
>+ __field(__u16, family)
>+ __field(__u16, protocol)
>+ __field(int, length)
>+ __field(int, error)
>+ __field(int, flags)
>+ ),
>+
>+ TP_fast_assign(
>+ __entry->sk = sk;
>+ __entry->family = sk->sk_family;
>+ __entry->protocol = sk->sk_protocol;
>+ __entry->length = ret > 0 ? ret : 0;
>+ __entry->error = ret < 0 ? ret : 0;
>+ __entry->flags = flags;
>+ ),
>+
>+ TP_printk("sk address = %p, family = %s protocol = %s, length = %d, error = %d, flags = 0x%x",
>+ __entry->sk, show_family_name(__entry->family),
>+ show_inet_protocol_name(__entry->protocol),
>+ __entry->length,
>+ __entry->error, __entry->flags)
>+);
>+
>+DEFINE_EVENT(sock_msg_length, sock_send_length,
>+ TP_PROTO(struct sock *sk, __u16 family, __u16 protocol, int ret,
>+ int flags),
>+
>+ TP_ARGS(sk, family, protocol, ret, flags)
>+);
>+
>+DEFINE_EVENT(sock_msg_length, sock_recv_length,
>+ TP_PROTO(struct sock *sk, __u16 family, __u16 protocol, int ret,
>+ int flags),
>+
>+ TP_ARGS(sk, family, protocol, ret, flags)
>+);
> #endif /* _TRACE_SOCK_H */
>
> /* This part must be outside protection */
>diff --git a/net/socket.c b/net/socket.c
>index 888cd618a968..60a1ff95b4b1 100644
>--- a/net/socket.c
>+++ b/net/socket.c
>@@ -106,6 +106,7 @@
> #include <net/busy_poll.h>
> #include <linux/errqueue.h>
> #include <linux/ptp_clock_kernel.h>
>+#include <trace/events/sock.h>
>
> #ifdef CONFIG_NET_RX_BUSY_POLL
> unsigned int sysctl_net_busy_read __read_mostly;
>@@ -715,6 +716,9 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
> inet_sendmsg, sock, msg,
> msg_data_left(msg));
> BUG_ON(ret == -EIOCBQUEUED);
>+
>+ trace_sock_send_length(sock->sk, sock->sk->sk_family,
>+ sock->sk->sk_protocol, ret, 0);
> return ret;
> }
>
>@@ -992,9 +996,15 @@ INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
> static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
> int flags)
> {
>- return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
>- inet_recvmsg, sock, msg, msg_data_left(msg),
>- flags);
>+ int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
>+ inet_recvmsg, sock, msg,
>+ msg_data_left(msg), flags);
>+
>+ trace_sock_recv_length(sock->sk, sock->sk->sk_family,
>+ sock->sk->sk_protocol,
>+ !(flags & MSG_PEEK) ? ret :
>+ (ret < 0 ? ret : 0), flags);
>+ return ret;
> }
>
> /**
>@@ -1044,6 +1054,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
> {
> struct socket *sock;
> int flags;
>+ int ret;
>
> sock = file->private_data;
>
>@@ -1051,7 +1062,11 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
> /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
> flags |= more;
>
>- return kernel_sendpage(sock, page, offset, size, flags);
>+ ret = kernel_sendpage(sock, page, offset, size, flags);
>+
>+ trace_sock_send_length(sock->sk, sock->sk->sk_family,
>+ sock->sk->sk_protocol, ret, 0);
>+ return ret;
> }
>
> static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
>--
>2.20.1