Hi,
According to the discussion in the mail thread
https://patchwork.kernel.org/patch/10099243/,
tcp_set_state tracepoint is renamed to sock_set_state tracepoint and is moved
to include/trace/events/sock.h.
Using this new tracepoint to trace TCP/DCCP/SCTP state transition.
v1->v2: Steven's patch is included in this series.
Steven Rostedt (1):
tcp: Export to userspace the TCP state names for the trace events
Yafang Shao (3):
net: tracepoint: replace tcp_set_state tracepoint with sock_set_state
tracepoint
net: tracepoint: using sock_set_state tracepoint to trace SCTP state
transition
net: tracepoint: using sock_set_state tracepoint to trace DCCP state
transition
include/net/sock.h | 15 +-----
include/trace/events/sock.h | 106 ++++++++++++++++++++++++++++++++++++++++
include/trace/events/tcp.h | 76 ----------------------------
net/core/sock.c | 13 +++++
net/dccp/proto.c | 2 +-
net/ipv4/inet_connection_sock.c | 4 +-
net/ipv4/inet_hashtables.c | 2 +-
net/ipv4/tcp.c | 4 --
net/sctp/endpointola.c | 2 +-
net/sctp/sm_sideeffect.c | 4 +-
net/sctp/socket.c | 12 ++---
11 files changed, 134 insertions(+), 106 deletions(-)
--
1.8.3.1
From: "Steven Rostedt (VMware)" <[email protected]>
The TCP trace events (specifically tcp_set_state), maps emums to symbol
names via __print_symbolic(). But this only works for reading trace events
from the tracefs trace files. If perf or trace-cmd were to record these
events, the event format file does not convert the enum names into numbers,
and you get something like:
__print_symbolic(REC->oldstate,
{ TCP_ESTABLISHED, "TCP_ESTABLISHED" },
{ TCP_SYN_SENT, "TCP_SYN_SENT" },
{ TCP_SYN_RECV, "TCP_SYN_RECV" },
{ TCP_FIN_WAIT1, "TCP_FIN_WAIT1" },
{ TCP_FIN_WAIT2, "TCP_FIN_WAIT2" },
{ TCP_TIME_WAIT, "TCP_TIME_WAIT" },
{ TCP_CLOSE, "TCP_CLOSE" },
{ TCP_CLOSE_WAIT, "TCP_CLOSE_WAIT" },
{ TCP_LAST_ACK, "TCP_LAST_ACK" },
{ TCP_LISTEN, "TCP_LISTEN" },
{ TCP_CLOSING, "TCP_CLOSING" },
{ TCP_NEW_SYN_RECV, "TCP_NEW_SYN_RECV" })
Where trace-cmd and perf do not know the values of those enums.
Use the TRACE_DEFINE_ENUM() macros that will have the trace events convert
the enum strings into their values at system boot. This will allow perf and
trace-cmd to see actual numbers and not enums:
__print_symbolic(REC->oldstate,
{ 1, "TCP_ESTABLISHED" },
{ 2, "TCP_SYN_SENT" },
{ 3, "TCP_SYN_RECV" },
{ 4, "TCP_FIN_WAIT1" },
{ 5, "TCP_FIN_WAIT2" },
{ 6, "TCP_TIME_WAIT" },
{ 7, "TCP_CLOSE" },
{ 8, "TCP_CLOSE_WAIT" },
{ 9, "TCP_LAST_ACK" },
{ 10, "TCP_LISTEN" },
{ 11, "TCP_CLOSING" },
{ 12, "TCP_NEW_SYN_RECV" })
Signed-off-by: Steven Rostedt (VMware) <[email protected]>
Acked-by: Song Liu <[email protected]>
Signed-off-by: Yafang Shao <[email protected]>
---
include/trace/events/tcp.h | 41 ++++++++++++++++++++++++++++-------------
1 file changed, 28 insertions(+), 13 deletions(-)
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 07cccca..40240ac 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -9,21 +9,36 @@
#include <linux/tracepoint.h>
#include <net/ipv6.h>
+#define tcp_state_names \
+ EM(TCP_ESTABLISHED) \
+ EM(TCP_SYN_SENT) \
+ EM(TCP_SYN_RECV) \
+ EM(TCP_FIN_WAIT1) \
+ EM(TCP_FIN_WAIT2) \
+ EM(TCP_TIME_WAIT) \
+ EM(TCP_CLOSE) \
+ EM(TCP_CLOSE_WAIT) \
+ EM(TCP_LAST_ACK) \
+ EM(TCP_LISTEN) \
+ EM(TCP_CLOSING) \
+ EMe(TCP_NEW_SYN_RECV) \
+
+/* enums need to be exported to user space */
+#undef EM
+#undef EMe
+#define EM(a) TRACE_DEFINE_ENUM(a);
+#define EMe(a) TRACE_DEFINE_ENUM(a);
+
+tcp_state_names
+
+#undef EM
+#undef EMe
+#define EM(a) tcp_state_name(a),
+#define EMe(a) tcp_state_name(a)
+
#define tcp_state_name(state) { state, #state }
#define show_tcp_state_name(val) \
- __print_symbolic(val, \
- tcp_state_name(TCP_ESTABLISHED), \
- tcp_state_name(TCP_SYN_SENT), \
- tcp_state_name(TCP_SYN_RECV), \
- tcp_state_name(TCP_FIN_WAIT1), \
- tcp_state_name(TCP_FIN_WAIT2), \
- tcp_state_name(TCP_TIME_WAIT), \
- tcp_state_name(TCP_CLOSE), \
- tcp_state_name(TCP_CLOSE_WAIT), \
- tcp_state_name(TCP_LAST_ACK), \
- tcp_state_name(TCP_LISTEN), \
- tcp_state_name(TCP_CLOSING), \
- tcp_state_name(TCP_NEW_SYN_RECV))
+ __print_symbolic(val, tcp_state_names)
/*
* tcp event with arguments sk and skb
--
1.8.3.1
As sk_state is a common field for struct sock, so the state
transition should not be a TCP specific feature.
So I rename tcp_set_state tracepoint to sock_set_state tracepoint with
some minor changes and move it into file trace/events/sock.h.
Two helpers are introduced to trace sk_state transition
- void sk_state_store(struct sock *sk, int state);
- void sk_set_state(struct sock *sk, int state);
As trace header should not be included in other header files,
so they are defined in sock.c.
The protocol such as SCTP maybe compiled as a ko, hence export
sk_set_state().
Signed-off-by: Yafang Shao <[email protected]>
---
include/net/sock.h | 15 +-----
include/trace/events/sock.h | 106 ++++++++++++++++++++++++++++++++++++++++
include/trace/events/tcp.h | 91 ----------------------------------
net/core/sock.c | 13 +++++
net/ipv4/inet_connection_sock.c | 4 +-
net/ipv4/inet_hashtables.c | 2 +-
net/ipv4/tcp.c | 4 --
7 files changed, 124 insertions(+), 111 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 9a90472..988ce82 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2344,19 +2344,8 @@ static inline int sk_state_load(const struct sock *sk)
return smp_load_acquire(&sk->sk_state);
}
-/**
- * sk_state_store - update sk->sk_state
- * @sk: socket pointer
- * @newstate: new state
- *
- * Paired with sk_state_load(). Should be used in contexts where
- * state change might impact lockless readers.
- */
-static inline void sk_state_store(struct sock *sk, int newstate)
-{
- smp_store_release(&sk->sk_state, newstate);
-}
-
+void sk_state_store(struct sock *sk, int newstate);
+void sk_set_state(struct sock *sk, int state);
void sock_enable_timestamp(struct sock *sk, int flag);
int sock_get_timestamp(struct sock *, struct timeval __user *);
int sock_get_timestampns(struct sock *, struct timespec __user *);
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index ec4dade..61977e5 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -6,7 +6,49 @@
#define _TRACE_SOCK_H
#include <net/sock.h>
+#include <net/ipv6.h>
#include <linux/tracepoint.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#define inet_protocol_names \
+ EM(IPPROTO_TCP) \
+ EM(IPPROTO_DCCP) \
+ EMe(IPPROTO_SCTP)
+
+#define tcp_state_names \
+ EM(TCP_ESTABLISHED) \
+ EM(TCP_SYN_SENT) \
+ EM(TCP_SYN_RECV) \
+ EM(TCP_FIN_WAIT1) \
+ EM(TCP_FIN_WAIT2) \
+ EM(TCP_TIME_WAIT) \
+ EM(TCP_CLOSE) \
+ EM(TCP_CLOSE_WAIT) \
+ EM(TCP_LAST_ACK) \
+ EM(TCP_LISTEN) \
+ EM(TCP_CLOSING) \
+ EMe(TCP_NEW_SYN_RECV)
+
+/* enums need to be exported to user space */
+#undef EM
+#undef EMe
+#define EM(a) TRACE_DEFINE_ENUM(a);
+#define EMe(a) TRACE_DEFINE_ENUM(a);
+
+inet_protocol_names
+tcp_state_names
+
+#undef EM
+#undef EMe
+#define EM(a) { a, #a },
+#define EMe(a) { a, #a }
+
+#define show_inet_protocol_name(val) \
+ __print_symbolic(val, inet_protocol_names)
+
+#define show_tcp_state_name(val) \
+ __print_symbolic(val, tcp_state_names)
TRACE_EVENT(sock_rcvqueue_full,
@@ -63,6 +105,70 @@
__entry->rmem_alloc)
);
+TRACE_EVENT(sock_set_state,
+
+ TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
+
+ TP_ARGS(sk, oldstate, newstate),
+
+ TP_STRUCT__entry(
+ __field(const void *, skaddr)
+ __field(int, oldstate)
+ __field(int, newstate)
+ __field(__u16, sport)
+ __field(__u16, dport)
+ __field(__u8, protocol)
+ __array(__u8, saddr, 4)
+ __array(__u8, daddr, 4)
+ __array(__u8, saddr_v6, 16)
+ __array(__u8, daddr_v6, 16)
+ ),
+
+ TP_fast_assign(
+ struct inet_sock *inet = inet_sk(sk);
+ struct in6_addr *pin6;
+ __be32 *p32;
+
+ __entry->skaddr = sk;
+ __entry->oldstate = oldstate;
+ __entry->newstate = newstate;
+
+ __entry->protocol = sk->sk_protocol;
+ __entry->sport = ntohs(inet->inet_sport);
+ __entry->dport = ntohs(inet->inet_dport);
+
+ p32 = (__be32 *) __entry->saddr;
+ *p32 = inet->inet_saddr;
+
+ p32 = (__be32 *) __entry->daddr;
+ *p32 = inet->inet_daddr;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6) {
+ pin6 = (struct in6_addr *)__entry->saddr_v6;
+ *pin6 = sk->sk_v6_rcv_saddr;
+ pin6 = (struct in6_addr *)__entry->daddr_v6;
+ *pin6 = sk->sk_v6_daddr;
+ } else
+#endif
+ {
+ pin6 = (struct in6_addr *)__entry->saddr_v6;
+ ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
+ pin6 = (struct in6_addr *)__entry->daddr_v6;
+ ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
+ }
+ ),
+
+ TP_printk("protocol=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4"
+ "saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
+ show_inet_protocol_name(__entry->protocol),
+ __entry->sport, __entry->dport,
+ __entry->saddr, __entry->daddr,
+ __entry->saddr_v6, __entry->daddr_v6,
+ show_tcp_state_name(__entry->oldstate),
+ show_tcp_state_name(__entry->newstate))
+);
+
#endif /* _TRACE_SOCK_H */
/* This part must be outside protection */
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 40240ac..7399399 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -9,37 +9,6 @@
#include <linux/tracepoint.h>
#include <net/ipv6.h>
-#define tcp_state_names \
- EM(TCP_ESTABLISHED) \
- EM(TCP_SYN_SENT) \
- EM(TCP_SYN_RECV) \
- EM(TCP_FIN_WAIT1) \
- EM(TCP_FIN_WAIT2) \
- EM(TCP_TIME_WAIT) \
- EM(TCP_CLOSE) \
- EM(TCP_CLOSE_WAIT) \
- EM(TCP_LAST_ACK) \
- EM(TCP_LISTEN) \
- EM(TCP_CLOSING) \
- EMe(TCP_NEW_SYN_RECV) \
-
-/* enums need to be exported to user space */
-#undef EM
-#undef EMe
-#define EM(a) TRACE_DEFINE_ENUM(a);
-#define EMe(a) TRACE_DEFINE_ENUM(a);
-
-tcp_state_names
-
-#undef EM
-#undef EMe
-#define EM(a) tcp_state_name(a),
-#define EMe(a) tcp_state_name(a)
-
-#define tcp_state_name(state) { state, #state }
-#define show_tcp_state_name(val) \
- __print_symbolic(val, tcp_state_names)
-
/*
* tcp event with arguments sk and skb
*
@@ -192,66 +161,6 @@
TP_ARGS(sk)
);
-TRACE_EVENT(tcp_set_state,
-
- TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
-
- TP_ARGS(sk, oldstate, newstate),
-
- TP_STRUCT__entry(
- __field(const void *, skaddr)
- __field(int, oldstate)
- __field(int, newstate)
- __field(__u16, sport)
- __field(__u16, dport)
- __array(__u8, saddr, 4)
- __array(__u8, daddr, 4)
- __array(__u8, saddr_v6, 16)
- __array(__u8, daddr_v6, 16)
- ),
-
- TP_fast_assign(
- struct inet_sock *inet = inet_sk(sk);
- struct in6_addr *pin6;
- __be32 *p32;
-
- __entry->skaddr = sk;
- __entry->oldstate = oldstate;
- __entry->newstate = newstate;
-
- __entry->sport = ntohs(inet->inet_sport);
- __entry->dport = ntohs(inet->inet_dport);
-
- p32 = (__be32 *) __entry->saddr;
- *p32 = inet->inet_saddr;
-
- p32 = (__be32 *) __entry->daddr;
- *p32 = inet->inet_daddr;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6) {
- pin6 = (struct in6_addr *)__entry->saddr_v6;
- *pin6 = sk->sk_v6_rcv_saddr;
- pin6 = (struct in6_addr *)__entry->daddr_v6;
- *pin6 = sk->sk_v6_daddr;
- } else
-#endif
- {
- pin6 = (struct in6_addr *)__entry->saddr_v6;
- ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
- pin6 = (struct in6_addr *)__entry->daddr_v6;
- ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
- }
- ),
-
- TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
- __entry->sport, __entry->dport,
- __entry->saddr, __entry->daddr,
- __entry->saddr_v6, __entry->daddr_v6,
- show_tcp_state_name(__entry->oldstate),
- show_tcp_state_name(__entry->newstate))
-);
-
TRACE_EVENT(tcp_retransmit_synack,
TP_PROTO(const struct sock *sk, const struct request_sock *req),
diff --git a/net/core/sock.c b/net/core/sock.c
index c0b5b2f..717f7f6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2859,6 +2859,19 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
}
EXPORT_SYMBOL(sock_get_timestampns);
+void sk_state_store(struct sock *sk, int state)
+{
+ trace_sock_set_state(sk, sk->sk_state, state);
+ smp_store_release(&sk->sk_state, state);
+}
+
+void sk_set_state(struct sock *sk, int state)
+{
+ trace_sock_set_state(sk, sk->sk_state, state);
+ sk->sk_state = state;
+}
+EXPORT_SYMBOL(sk_set_state);
+
void sock_enable_timestamp(struct sock *sk, int flag)
{
if (!sock_flag(sk, flag)) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4ca46dc..001f7b0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -783,7 +783,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);
- newsk->sk_state = TCP_SYN_RECV;
+ sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
@@ -888,7 +888,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
return 0;
}
- sk->sk_state = TCP_CLOSE;
+ sk_set_state(sk, TCP_CLOSE);
return err;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f6f5810..5973693 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -544,7 +544,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
} else {
percpu_counter_inc(sk->sk_prot->orphan_count);
- sk->sk_state = TCP_CLOSE;
+ sk_set_state(sk, TCP_CLOSE);
sock_set_flag(sk, SOCK_DEAD);
inet_csk_destroy_sock(sk);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c470fec..df6da92 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,8 +283,6 @@
#include <asm/ioctls.h>
#include <net/busy_poll.h>
-#include <trace/events/tcp.h>
-
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -2040,8 +2038,6 @@ void tcp_set_state(struct sock *sk, int state)
{
int oldstate = sk->sk_state;
- trace_tcp_set_state(sk, oldstate, state);
-
switch (state) {
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED)
--
1.8.3.1
With changes in inet_ files, DCCP state transitions are traced with
sock_set_state tracepoint.
Signed-off-by: Yafang Shao <[email protected]>
---
net/dccp/proto.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9d43c1f..2874faf 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -110,7 +110,7 @@ void dccp_set_state(struct sock *sk, const int state)
/* Change state AFTER socket is unhashed to avoid closed
* socket sitting in hash tables.
*/
- sk->sk_state = state;
+ sk_set_state(sk, state);
}
EXPORT_SYMBOL_GPL(dccp_set_state);
--
1.8.3.1
With changes in inet_ files, SCTP state transitions are traced with
sockt_set_state tracepoint.
Signed-off-by: Yafang Shao <[email protected]>
---
net/sctp/endpointola.c | 2 +-
net/sctp/sm_sideeffect.c | 4 ++--
net/sctp/socket.c | 12 ++++++------
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index ee1e601..5e129df 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -232,7 +232,7 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
{
ep->base.dead = true;
- ep->base.sk->sk_state = SCTP_SS_CLOSED;
+ sk_set_state(ep->base.sk, SCTP_SS_CLOSED);
/* Unlink this endpoint, so we can't find it again! */
sctp_unhash_endpoint(ep);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 8adde71..22ab3b4 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -878,12 +878,12 @@ static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
* successfully completed a connect() call.
*/
if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED))
- sk->sk_state = SCTP_SS_ESTABLISHED;
+ sk_set_state(sk, SCTP_SS_ESTABLISHED);
/* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */
if (sctp_state(asoc, SHUTDOWN_RECEIVED) &&
sctp_sstate(sk, ESTABLISHED)) {
- sk->sk_state = SCTP_SS_CLOSING;
+ sk_set_state(sk, SCTP_SS_CLOSING);
sk->sk_shutdown |= RCV_SHUTDOWN;
}
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7eec0a0..ecb532c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1544,7 +1544,7 @@ static void sctp_close(struct sock *sk, long timeout)
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
sk->sk_shutdown = SHUTDOWN_MASK;
- sk->sk_state = SCTP_SS_CLOSING;
+ sk_set_state(sk, SCTP_SS_CLOSING);
ep = sctp_sk(sk)->ep;
@@ -4653,7 +4653,7 @@ static void sctp_shutdown(struct sock *sk, int how)
if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
struct sctp_association *asoc;
- sk->sk_state = SCTP_SS_CLOSING;
+ sk_set_state(sk, SCTP_SS_CLOSING);
asoc = list_entry(ep->asocs.next,
struct sctp_association, asocs);
sctp_primitive_SHUTDOWN(net, asoc, NULL);
@@ -7509,13 +7509,13 @@ static int sctp_listen_start(struct sock *sk, int backlog)
* sockets.
*
*/
- sk->sk_state = SCTP_SS_LISTENING;
+ sk_set_state(sk, SCTP_SS_LISTENING);
if (!ep->base.bind_addr.port) {
if (sctp_autobind(sk))
return -EAGAIN;
} else {
if (sctp_get_port(sk, inet_sk(sk)->inet_num)) {
- sk->sk_state = SCTP_SS_CLOSED;
+ sk_set_state(sk, SCTP_SS_CLOSED);
return -EADDRINUSE;
}
}
@@ -8538,10 +8538,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* is called, set RCV_SHUTDOWN flag.
*/
if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) {
- newsk->sk_state = SCTP_SS_CLOSED;
+ sk_set_state(newsk, SCTP_SS_CLOSED);
newsk->sk_shutdown |= RCV_SHUTDOWN;
} else {
- newsk->sk_state = SCTP_SS_ESTABLISHED;
+ sk_set_state(newsk, SCTP_SS_ESTABLISHED);
}
release_sock(newsk);
--
1.8.3.1
> On Dec 15, 2017, at 9:56 AM, Yafang Shao <[email protected]> wrote:
>
> As sk_state is a common field for struct sock, so the state
> transition should not be a TCP specific feature.
> So I rename tcp_set_state tracepoint to sock_set_state tracepoint with
> some minor changes and move it into file trace/events/sock.h.
>
> Two helpers are introduced to trace sk_state transition
> - void sk_state_store(struct sock *sk, int state);
> - void sk_set_state(struct sock *sk, int state);
> As trace header should not be included in other header files,
> so they are defined in sock.c.
>
> The protocol such as SCTP maybe compiled as a ko, hence export
> sk_set_state().
>
> Signed-off-by: Yafang Shao <[email protected]>
> ---
> include/net/sock.h | 15 +-----
> include/trace/events/sock.h | 106 ++++++++++++++++++++++++++++++++++++++++
> include/trace/events/tcp.h | 91 ----------------------------------
> net/core/sock.c | 13 +++++
> net/ipv4/inet_connection_sock.c | 4 +-
> net/ipv4/inet_hashtables.c | 2 +-
> net/ipv4/tcp.c | 4 --
> 7 files changed, 124 insertions(+), 111 deletions(-)
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 9a90472..988ce82 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2344,19 +2344,8 @@ static inline int sk_state_load(const struct sock *sk)
> return smp_load_acquire(&sk->sk_state);
> }
>
> -/**
> - * sk_state_store - update sk->sk_state
> - * @sk: socket pointer
> - * @newstate: new state
> - *
> - * Paired with sk_state_load(). Should be used in contexts where
> - * state change might impact lockless readers.
> - */
> -static inline void sk_state_store(struct sock *sk, int newstate)
> -{
> - smp_store_release(&sk->sk_state, newstate);
> -}
> -
> +void sk_state_store(struct sock *sk, int newstate);
> +void sk_set_state(struct sock *sk, int state);
> void sock_enable_timestamp(struct sock *sk, int flag);
> int sock_get_timestamp(struct sock *, struct timeval __user *);
> int sock_get_timestampns(struct sock *, struct timespec __user *);
> diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
> index ec4dade..61977e5 100644
> --- a/include/trace/events/sock.h
> +++ b/include/trace/events/sock.h
> @@ -6,7 +6,49 @@
> #define _TRACE_SOCK_H
>
> #include <net/sock.h>
> +#include <net/ipv6.h>
> #include <linux/tracepoint.h>
> +#include <linux/ipv6.h>
> +#include <linux/tcp.h>
> +
> +#define inet_protocol_names \
> + EM(IPPROTO_TCP) \
> + EM(IPPROTO_DCCP) \
> + EMe(IPPROTO_SCTP)
> +
> +#define tcp_state_names \
> + EM(TCP_ESTABLISHED) \
> + EM(TCP_SYN_SENT) \
> + EM(TCP_SYN_RECV) \
> + EM(TCP_FIN_WAIT1) \
> + EM(TCP_FIN_WAIT2) \
> + EM(TCP_TIME_WAIT) \
> + EM(TCP_CLOSE) \
> + EM(TCP_CLOSE_WAIT) \
> + EM(TCP_LAST_ACK) \
> + EM(TCP_LISTEN) \
> + EM(TCP_CLOSING) \
> + EMe(TCP_NEW_SYN_RECV)
Please keep these backslashes aligned.
> +/* enums need to be exported to user space */
> +#undef EM
> +#undef EMe
> +#define EM(a) TRACE_DEFINE_ENUM(a);
> +#define EMe(a) TRACE_DEFINE_ENUM(a);
> +
> +inet_protocol_names
> +tcp_state_names
> +
> +#undef EM
> +#undef EMe
> +#define EM(a) { a, #a },
> +#define EMe(a) { a, #a }
> +
> +#define show_inet_protocol_name(val) \
> + __print_symbolic(val, inet_protocol_names)
> +
> +#define show_tcp_state_name(val) \
> + __print_symbolic(val, tcp_state_names)
>
> TRACE_EVENT(sock_rcvqueue_full,
>
> @@ -63,6 +105,70 @@
> __entry->rmem_alloc)
> );
>
> +TRACE_EVENT(sock_set_state,
> +
> + TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
> +
> + TP_ARGS(sk, oldstate, newstate),
> +
> + TP_STRUCT__entry(
> + __field(const void *, skaddr)
> + __field(int, oldstate)
> + __field(int, newstate)
> + __field(__u16, sport)
> + __field(__u16, dport)
> + __field(__u8, protocol)
> + __array(__u8, saddr, 4)
> + __array(__u8, daddr, 4)
> + __array(__u8, saddr_v6, 16)
> + __array(__u8, daddr_v6, 16)
> + ),
> +
> + TP_fast_assign(
> + struct inet_sock *inet = inet_sk(sk);
> + struct in6_addr *pin6;
> + __be32 *p32;
> +
> + __entry->skaddr = sk;
> + __entry->oldstate = oldstate;
> + __entry->newstate = newstate;
> +
> + __entry->protocol = sk->sk_protocol;
> + __entry->sport = ntohs(inet->inet_sport);
> + __entry->dport = ntohs(inet->inet_dport);
> +
> + p32 = (__be32 *) __entry->saddr;
> + *p32 = inet->inet_saddr;
> +
> + p32 = (__be32 *) __entry->daddr;
> + *p32 = inet->inet_daddr;
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> + if (sk->sk_family == AF_INET6) {
> + pin6 = (struct in6_addr *)__entry->saddr_v6;
> + *pin6 = sk->sk_v6_rcv_saddr;
> + pin6 = (struct in6_addr *)__entry->daddr_v6;
> + *pin6 = sk->sk_v6_daddr;
> + } else
> +#endif
> + {
> + pin6 = (struct in6_addr *)__entry->saddr_v6;
> + ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
> + pin6 = (struct in6_addr *)__entry->daddr_v6;
> + ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
> + }
> + ),
What if sk_family is not AF_INET or AF_INET6? We are probably OK not
checking it for tcp, but we should definitely consider this for all
sockets in general.
Thanks,
Song
> + TP_printk("protocol=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4"
> + "saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
> + show_inet_protocol_name(__entry->protocol),
> + __entry->sport, __entry->dport,
> + __entry->saddr, __entry->daddr,
> + __entry->saddr_v6, __entry->daddr_v6,
> + show_tcp_state_name(__entry->oldstate),
> + show_tcp_state_name(__entry->newstate))
> +);
> +
> #endif /* _TRACE_SOCK_H */
>
> /* This part must be outside protection */
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index 40240ac..7399399 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -9,37 +9,6 @@
> #include <linux/tracepoint.h>
> #include <net/ipv6.h>
>
> -#define tcp_state_names \
> - EM(TCP_ESTABLISHED) \
> - EM(TCP_SYN_SENT) \
> - EM(TCP_SYN_RECV) \
> - EM(TCP_FIN_WAIT1) \
> - EM(TCP_FIN_WAIT2) \
> - EM(TCP_TIME_WAIT) \
> - EM(TCP_CLOSE) \
> - EM(TCP_CLOSE_WAIT) \
> - EM(TCP_LAST_ACK) \
> - EM(TCP_LISTEN) \
> - EM(TCP_CLOSING) \
> - EMe(TCP_NEW_SYN_RECV) \
> -
> -/* enums need to be exported to user space */
> -#undef EM
> -#undef EMe
> -#define EM(a) TRACE_DEFINE_ENUM(a);
> -#define EMe(a) TRACE_DEFINE_ENUM(a);
> -
> -tcp_state_names
> -
> -#undef EM
> -#undef EMe
> -#define EM(a) tcp_state_name(a),
> -#define EMe(a) tcp_state_name(a)
> -
> -#define tcp_state_name(state) { state, #state }
> -#define show_tcp_state_name(val) \
> - __print_symbolic(val, tcp_state_names)
> -
> /*
> * tcp event with arguments sk and skb
> *
> @@ -192,66 +161,6 @@
> TP_ARGS(sk)
> );
>
> -TRACE_EVENT(tcp_set_state,
> -
> - TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
> -
> - TP_ARGS(sk, oldstate, newstate),
> -
> - TP_STRUCT__entry(
> - __field(const void *, skaddr)
> - __field(int, oldstate)
> - __field(int, newstate)
> - __field(__u16, sport)
> - __field(__u16, dport)
> - __array(__u8, saddr, 4)
> - __array(__u8, daddr, 4)
> - __array(__u8, saddr_v6, 16)
> - __array(__u8, daddr_v6, 16)
> - ),
> -
> - TP_fast_assign(
> - struct inet_sock *inet = inet_sk(sk);
> - struct in6_addr *pin6;
> - __be32 *p32;
> -
> - __entry->skaddr = sk;
> - __entry->oldstate = oldstate;
> - __entry->newstate = newstate;
> -
> - __entry->sport = ntohs(inet->inet_sport);
> - __entry->dport = ntohs(inet->inet_dport);
> -
> - p32 = (__be32 *) __entry->saddr;
> - *p32 = inet->inet_saddr;
> -
> - p32 = (__be32 *) __entry->daddr;
> - *p32 = inet->inet_daddr;
> -
> -#if IS_ENABLED(CONFIG_IPV6)
> - if (sk->sk_family == AF_INET6) {
> - pin6 = (struct in6_addr *)__entry->saddr_v6;
> - *pin6 = sk->sk_v6_rcv_saddr;
> - pin6 = (struct in6_addr *)__entry->daddr_v6;
> - *pin6 = sk->sk_v6_daddr;
> - } else
> -#endif
> - {
> - pin6 = (struct in6_addr *)__entry->saddr_v6;
> - ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
> - pin6 = (struct in6_addr *)__entry->daddr_v6;
> - ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
> - }
> - ),
> -
> - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
> - __entry->sport, __entry->dport,
> - __entry->saddr, __entry->daddr,
> - __entry->saddr_v6, __entry->daddr_v6,
> - show_tcp_state_name(__entry->oldstate),
> - show_tcp_state_name(__entry->newstate))
> -);
> -
> TRACE_EVENT(tcp_retransmit_synack,
>
> TP_PROTO(const struct sock *sk, const struct request_sock *req),
> diff --git a/net/core/sock.c b/net/core/sock.c
> index c0b5b2f..717f7f6 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2859,6 +2859,19 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
> }
> EXPORT_SYMBOL(sock_get_timestampns);
>
> +void sk_state_store(struct sock *sk, int state)
> +{
> + trace_sock_set_state(sk, sk->sk_state, state);
> + smp_store_release(&sk->sk_state, state);
> +}
> +
> +void sk_set_state(struct sock *sk, int state)
> +{
> + trace_sock_set_state(sk, sk->sk_state, state);
> + sk->sk_state = state;
> +}
> +EXPORT_SYMBOL(sk_set_state);
> +
> void sock_enable_timestamp(struct sock *sk, int flag)
> {
> if (!sock_flag(sk, flag)) {
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 4ca46dc..001f7b0 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -783,7 +783,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
> if (newsk) {
> struct inet_connection_sock *newicsk = inet_csk(newsk);
>
> - newsk->sk_state = TCP_SYN_RECV;
> + sk_set_state(newsk, TCP_SYN_RECV);
> newicsk->icsk_bind_hash = NULL;
>
> inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
> @@ -888,7 +888,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
> return 0;
> }
>
> - sk->sk_state = TCP_CLOSE;
> + sk_set_state(sk, TCP_CLOSE);
> return err;
> }
> EXPORT_SYMBOL_GPL(inet_csk_listen_start);
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index f6f5810..5973693 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -544,7 +544,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
> sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
> } else {
> percpu_counter_inc(sk->sk_prot->orphan_count);
> - sk->sk_state = TCP_CLOSE;
> + sk_set_state(sk, TCP_CLOSE);
> sock_set_flag(sk, SOCK_DEAD);
> inet_csk_destroy_sock(sk);
> }
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index c470fec..df6da92 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -283,8 +283,6 @@
> #include <asm/ioctls.h>
> #include <net/busy_poll.h>
>
> -#include <trace/events/tcp.h>
> -
> struct percpu_counter tcp_orphan_count;
> EXPORT_SYMBOL_GPL(tcp_orphan_count);
>
> @@ -2040,8 +2038,6 @@ void tcp_set_state(struct sock *sk, int state)
> {
> int oldstate = sk->sk_state;
>
> - trace_tcp_set_state(sk, oldstate, state);
> -
> switch (state) {
> case TCP_ESTABLISHED:
> if (oldstate != TCP_ESTABLISHED)
> --
> 1.8.3.1
>
2017-12-16 6:47 GMT+08:00 Song Liu <[email protected]>:
>
>> On Dec 15, 2017, at 9:56 AM, Yafang Shao <[email protected]> wrote:
>>
>> As sk_state is a common field for struct sock, so the state
>> transition should not be a TCP specific feature.
>> So I rename tcp_set_state tracepoint to sock_set_state tracepoint with
>> some minor changes and move it into file trace/events/sock.h.
>>
>> Two helpers are introduced to trace sk_state transition
>> - void sk_state_store(struct sock *sk, int state);
>> - void sk_set_state(struct sock *sk, int state);
>> As trace header should not be included in other header files,
>> so they are defined in sock.c.
>>
>> The protocol such as SCTP maybe compiled as a ko, hence export
>> sk_set_state().
>>
>> Signed-off-by: Yafang Shao <[email protected]>
>> ---
>> include/net/sock.h | 15 +-----
>> include/trace/events/sock.h | 106 ++++++++++++++++++++++++++++++++++++++++
>> include/trace/events/tcp.h | 91 ----------------------------------
>> net/core/sock.c | 13 +++++
>> net/ipv4/inet_connection_sock.c | 4 +-
>> net/ipv4/inet_hashtables.c | 2 +-
>> net/ipv4/tcp.c | 4 --
>> 7 files changed, 124 insertions(+), 111 deletions(-)
>>
>> diff --git a/include/net/sock.h b/include/net/sock.h
>> index 9a90472..988ce82 100644
>> --- a/include/net/sock.h
>> +++ b/include/net/sock.h
>> @@ -2344,19 +2344,8 @@ static inline int sk_state_load(const struct sock *sk)
>> return smp_load_acquire(&sk->sk_state);
>> }
>>
>> -/**
>> - * sk_state_store - update sk->sk_state
>> - * @sk: socket pointer
>> - * @newstate: new state
>> - *
>> - * Paired with sk_state_load(). Should be used in contexts where
>> - * state change might impact lockless readers.
>> - */
>> -static inline void sk_state_store(struct sock *sk, int newstate)
>> -{
>> - smp_store_release(&sk->sk_state, newstate);
>> -}
>> -
>> +void sk_state_store(struct sock *sk, int newstate);
>> +void sk_set_state(struct sock *sk, int state);
>> void sock_enable_timestamp(struct sock *sk, int flag);
>> int sock_get_timestamp(struct sock *, struct timeval __user *);
>> int sock_get_timestampns(struct sock *, struct timespec __user *);
>> diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
>> index ec4dade..61977e5 100644
>> --- a/include/trace/events/sock.h
>> +++ b/include/trace/events/sock.h
>> @@ -6,7 +6,49 @@
>> #define _TRACE_SOCK_H
>>
>> #include <net/sock.h>
>> +#include <net/ipv6.h>
>> #include <linux/tracepoint.h>
>> +#include <linux/ipv6.h>
>> +#include <linux/tcp.h>
>> +
>> +#define inet_protocol_names \
>> + EM(IPPROTO_TCP) \
>> + EM(IPPROTO_DCCP) \
>> + EMe(IPPROTO_SCTP)
>> +
>> +#define tcp_state_names \
>> + EM(TCP_ESTABLISHED) \
>> + EM(TCP_SYN_SENT) \
>> + EM(TCP_SYN_RECV) \
>> + EM(TCP_FIN_WAIT1) \
>> + EM(TCP_FIN_WAIT2) \
>> + EM(TCP_TIME_WAIT) \
>> + EM(TCP_CLOSE) \
>> + EM(TCP_CLOSE_WAIT) \
>> + EM(TCP_LAST_ACK) \
>> + EM(TCP_LISTEN) \
>> + EM(TCP_CLOSING) \
>> + EMe(TCP_NEW_SYN_RECV)
>
> Please keep these backslashes aligned.
>
OK
This is because I made it aligned with TAB in my original code.
>> +/* enums need to be exported to user space */
>> +#undef EM
>> +#undef EMe
>> +#define EM(a) TRACE_DEFINE_ENUM(a);
>> +#define EMe(a) TRACE_DEFINE_ENUM(a);
>> +
>> +inet_protocol_names
>> +tcp_state_names
>> +
>> +#undef EM
>> +#undef EMe
>> +#define EM(a) { a, #a },
>> +#define EMe(a) { a, #a }
>> +
>> +#define show_inet_protocol_name(val) \
>> + __print_symbolic(val, inet_protocol_names)
>> +
>> +#define show_tcp_state_name(val) \
>> + __print_symbolic(val, tcp_state_names)
>>
>> TRACE_EVENT(sock_rcvqueue_full,
>>
>> @@ -63,6 +105,70 @@
>> __entry->rmem_alloc)
>> );
>>
>> +TRACE_EVENT(sock_set_state,
>> +
>> + TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
>> +
>> + TP_ARGS(sk, oldstate, newstate),
>> +
>> + TP_STRUCT__entry(
>> + __field(const void *, skaddr)
>> + __field(int, oldstate)
>> + __field(int, newstate)
>> + __field(__u16, sport)
>> + __field(__u16, dport)
>> + __field(__u8, protocol)
>> + __array(__u8, saddr, 4)
>> + __array(__u8, daddr, 4)
>> + __array(__u8, saddr_v6, 16)
>> + __array(__u8, daddr_v6, 16)
>> + ),
>> +
>> + TP_fast_assign(
>> + struct inet_sock *inet = inet_sk(sk);
>> + struct in6_addr *pin6;
>> + __be32 *p32;
>> +
>> + __entry->skaddr = sk;
>> + __entry->oldstate = oldstate;
>> + __entry->newstate = newstate;
>> +
>> + __entry->protocol = sk->sk_protocol;
>> + __entry->sport = ntohs(inet->inet_sport);
>> + __entry->dport = ntohs(inet->inet_dport);
>> +
>> + p32 = (__be32 *) __entry->saddr;
>> + *p32 = inet->inet_saddr;
>> +
>> + p32 = (__be32 *) __entry->daddr;
>> + *p32 = inet->inet_daddr;
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> + if (sk->sk_family == AF_INET6) {
>> + pin6 = (struct in6_addr *)__entry->saddr_v6;
>> + *pin6 = sk->sk_v6_rcv_saddr;
>> + pin6 = (struct in6_addr *)__entry->daddr_v6;
>> + *pin6 = sk->sk_v6_daddr;
>> + } else
>> +#endif
>> + {
>> + pin6 = (struct in6_addr *)__entry->saddr_v6;
>> + ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
>> + pin6 = (struct in6_addr *)__entry->daddr_v6;
>> + ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
>> + }
>> + ),
>
> What if sk_family is not AF_INET or AF_INET6? We are probably OK not
> checking it for tcp, but we should definitely consider this for all
> sockets in general.
>
This is the question I had been think of.
Do it make sense to trace as much protocol as possible ?
Maybe not.
Take IPPROTO_UDP for example, it only has two states, TCP_CLOSE and
TCP_ESTABLISHED.
Maybe it is useless to trace UDP sk_state transition.
So In this patch I only trace TCP/DCCP/SCTP state transition, which
have multi states and the states transition could help us analyze
problems.
All these three protocol are AF_INET/AF_INET6.
IMO, maybe it doesn't need to trace protocols which are not
AF_INET/AF_INET6. So we don't need to make the code complicate and
output some usless infomation.
Thanks
Yafang