When using `tcp_drop(struct sock *sk, struct sk_buff *skb)` we can
not tell why we need to delete `skb`. To solve this problem I updated the
method `tcp_drop(struct sock *sk, struct sk_buff *skb, enum tcp_drop_reason reason)`
to include the source of the deletion when it is done, so you can
get an idea of the reason for the deletion based on the source.
The current purpose is mainly derived from the suggestions
of `Yonghong Song` and `brendangregg`:
https://github.com/iovisor/bcc/issues/3533.
"It is worthwhile to mention the context/why we want to this
tracepoint with bcc issue https://github.com/iovisor/bcc/issues/3533.
Mainly two reasons: (1). tcp_drop is a tiny function which
may easily get inlined, a tracepoint is more stable, and (2).
tcp_drop does not provide enough information on why it is dropped.
" by Yonghong Song
Signed-off-by: Zhongya Yan <[email protected]>
---
include/net/tcp.h | 11 ++++++++
include/trace/events/tcp.h | 56 ++++++++++++++++++++++++++++++++++++++
net/ipv4/tcp_input.c | 34 +++++++++++++++--------
3 files changed, 89 insertions(+), 12 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 784d5c3ef1c5..dd8cd8d6f2f1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -254,6 +254,17 @@ extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
extern unsigned long tcp_memory_pressure;
+enum tcp_drop_reason {
+ TCP_OFO_QUEUE = 1,
+ TCP_DATA_QUEUE_OFO = 2,
+ TCP_DATA_QUEUE = 3,
+ TCP_PRUNE_OFO_QUEUE = 4,
+ TCP_VALIDATE_INCOMING = 5,
+ TCP_RCV_ESTABLISHED = 6,
+ TCP_RCV_SYNSENT_STATE_PROCESS = 7,
+ TCP_RCV_STATE_PROCESS = 8
+};
+
/* optimized version of sk_under_memory_pressure() for TCP sockets */
static inline bool tcp_under_memory_pressure(const struct sock *sk)
{
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 521059d8dc0a..a0d3d31eb591 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -371,6 +371,62 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
TP_ARGS(skb)
);
+TRACE_EVENT(tcp_drop,
+ TP_PROTO(struct sock *sk, struct sk_buff *skb, enum tcp_drop_reason reason),
+
+ TP_ARGS(sk, skb, reason),
+
+ TP_STRUCT__entry(
+ __array(__u8, saddr, sizeof(struct sockaddr_in6))
+ __array(__u8, daddr, sizeof(struct sockaddr_in6))
+ __field(__u16, sport)
+ __field(__u16, dport)
+ __field(__u32, mark)
+ __field(__u16, data_len)
+ __field(__u32, snd_nxt)
+ __field(__u32, snd_una)
+ __field(__u32, snd_cwnd)
+ __field(__u32, ssthresh)
+ __field(__u32, snd_wnd)
+ __field(__u32, srtt)
+ __field(__u32, rcv_wnd)
+ __field(__u64, sock_cookie)
+ __field(__u32, reason)
+ ),
+
+ TP_fast_assign(
+ const struct tcphdr *th = (const struct tcphdr *)skb->data;
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+ memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+ TP_STORE_ADDR_PORTS(__entry, inet, sk);
+
+ __entry->sport = ntohs(inet->inet_sport);
+ __entry->dport = ntohs(inet->inet_dport);
+ __entry->mark = skb->mark;
+
+ __entry->data_len = skb->len - __tcp_hdrlen(th);
+ __entry->snd_nxt = tp->snd_nxt;
+ __entry->snd_una = tp->snd_una;
+ __entry->snd_cwnd = tp->snd_cwnd;
+ __entry->snd_wnd = tp->snd_wnd;
+ __entry->rcv_wnd = tp->rcv_wnd;
+ __entry->ssthresh = tcp_current_ssthresh(sk);
+ __entry->srtt = tp->srtt_us >> 3;
+ __entry->sock_cookie = sock_gen_cookie(sk);
+ __entry->reason = reason;
+ ),
+
+ TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx reason=%d",
+ __entry->saddr, __entry->daddr, __entry->mark,
+ __entry->data_len, __entry->snd_nxt, __entry->snd_una,
+ __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
+ __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, __entry->reason)
+);
+
#endif /* _TRACE_TCP_H */
/* This part must be outside protection */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 149ceb5c94ff..83e31661876b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4643,12 +4643,22 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
return res;
}
-static void tcp_drop(struct sock *sk, struct sk_buff *skb)
+static void __tcp_drop(struct sock *sk,
+ struct sk_buff *skb)
{
sk_drops_add(sk, skb);
__kfree_skb(skb);
}
+/* tcp_drop whit reason,for epbf trace
+ */
+static void tcp_drop(struct sock *sk, struct sk_buff *skb,
+ enum tcp_drop_reason reason)
+{
+ trace_tcp_drop(sk, skb, reason);
+ __tcp_drop(sk, skb);
+}
+
/* This one checks to see if we can put data from the
* out_of_order queue into the receive_queue.
*/
@@ -4676,7 +4686,7 @@ static void tcp_ofo_queue(struct sock *sk)
rb_erase(&skb->rbnode, &tp->out_of_order_queue);
if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
- tcp_drop(sk, skb);
+ tcp_drop(sk, skb, TCP_OFO_QUEUE);
continue;
}
@@ -4732,7 +4742,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
sk->sk_data_ready(sk);
- tcp_drop(sk, skb);
+ tcp_drop(sk, skb, TCP_DATA_QUEUE_OFO);
return;
}
@@ -4795,7 +4805,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* All the bits are present. Drop. */
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb);
+ tcp_drop(sk, skb, TCP_DATA_QUEUE_OFO);
skb = NULL;
tcp_dsack_set(sk, seq, end_seq);
goto add_sack;
@@ -4814,7 +4824,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb1);
+ tcp_drop(sk, skb1, TCP_DATA_QUEUE_OFO);
goto merge_right;
}
} else if (tcp_ooo_try_coalesce(sk, skb1,
@@ -4842,7 +4852,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb1);
+ tcp_drop(sk, skb1, TCP_DATA_QUEUE_OFO);
}
/* If there is no skb after us, we are the last_skb ! */
if (!skb1)
@@ -5019,7 +5029,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_schedule_ack(sk);
drop:
- tcp_drop(sk, skb);
+ tcp_drop(sk, skb, TCP_DATA_QUEUE);
return;
}
@@ -5276,7 +5286,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
goal -= rb_to_skb(node)->truesize;
- tcp_drop(sk, rb_to_skb(node));
+ tcp_drop(sk, rb_to_skb(node), TCP_PRUNE_OFO_QUEUE);
if (!prev || goal <= 0) {
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
@@ -5701,7 +5711,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
return true;
discard:
- tcp_drop(sk, skb);
+ tcp_drop(sk, skb, TCP_VALIDATE_INCOMING);
return false;
}
@@ -5905,7 +5915,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
discard:
- tcp_drop(sk, skb);
+ tcp_drop(sk, skb, TCP_RCV_ESTABLISHED);
}
EXPORT_SYMBOL(tcp_rcv_established);
@@ -6196,7 +6206,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
TCP_DELACK_MAX, TCP_RTO_MAX);
discard:
- tcp_drop(sk, skb);
+ tcp_drop(sk, skb, TCP_RCV_SYNSENT_STATE_PROCESS);
return 0;
} else {
tcp_send_ack(sk);
@@ -6568,7 +6578,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (!queued) {
discard:
- tcp_drop(sk, skb);
+ tcp_drop(sk, skb, TCP_RCV_STATE_PROCESS);
}
return 0;
}
--
2.25.1
On Tue, 24 Aug 2021 05:51:40 -0700 Zhongya Yan wrote:
> +enum tcp_drop_reason {
> + TCP_OFO_QUEUE = 1,
> + TCP_DATA_QUEUE_OFO = 2,
> + TCP_DATA_QUEUE = 3,
> + TCP_PRUNE_OFO_QUEUE = 4,
> + TCP_VALIDATE_INCOMING = 5,
> + TCP_RCV_ESTABLISHED = 6,
> + TCP_RCV_SYNSENT_STATE_PROCESS = 7,
> + TCP_RCV_STATE_PROCESS = 8
> +};
This is basically tracking the caller, each may have multiple reasons
for dropping. Is tracking the caller sufficient? Should we at least
make this a bitmask so we can set multiple bits (caller and more
precise reason)? Or are we going to add another field in that case?
> -static void tcp_drop(struct sock *sk, struct sk_buff *skb)
> +static void __tcp_drop(struct sock *sk,
> + struct sk_buff *skb)
> {
> sk_drops_add(sk, skb);
> __kfree_skb(skb);
> }
Why keep this function if there is only one caller?
> +/* tcp_drop whit reason,for epbf trace
> + */
This comment is (a) misspelled, (b) doesn't add much value.
> +static void tcp_drop(struct sock *sk, struct sk_buff *skb,
> + enum tcp_drop_reason reason)
> +{
> + trace_tcp_drop(sk, skb, reason);
> + __tcp_drop(sk, skb);
> +}
On Tue, Aug 24, 2021 at 5:52 AM Zhongya Yan <[email protected]> wrote:
>
> When using `tcp_drop(struct sock *sk, struct sk_buff *skb)` we can
> not tell why we need to delete `skb`. To solve this problem I updated the
> method `tcp_drop(struct sock *sk, struct sk_buff *skb, enum tcp_drop_reason reason)`
> to include the source of the deletion when it is done, so you can
> get an idea of the reason for the deletion based on the source.
>
> The current purpose is mainly derived from the suggestions
> of `Yonghong Song` and `brendangregg`:
>
> https://github.com/iovisor/bcc/issues/3533.
>
> "It is worthwhile to mention the context/why we want to this
> tracepoint with bcc issue https://github.com/iovisor/bcc/issues/3533.
> Mainly two reasons: (1). tcp_drop is a tiny function which
> may easily get inlined, a tracepoint is more stable, and (2).
> tcp_drop does not provide enough information on why it is dropped.
> " by Yonghong Song
>
> Signed-off-by: Zhongya Yan <[email protected]>
> ---
That is a good start, but really if people want to use this
tracepoint, they will hit a wall soon.
> return true;
>
> discard:
There are many " goto discards;" in this function, so using a common
value ("TCP_VALIDATE_INCOMING" ) is not helpful.
> - tcp_drop(sk, skb);
> + tcp_drop(sk, skb, TCP_VALIDATE_INCOMING);
> return false;
> }
>
> @@ -5905,7 +5915,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
> TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
>
> discard:
> - tcp_drop(sk, skb);
Same here.
> + tcp_drop(sk, skb, TCP_RCV_ESTABLISHED);
> }
> EXPORT_SYMBOL(tcp_rcv_established);
>
> @@ -6196,7 +6206,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
> TCP_DELACK_MAX, TCP_RTO_MAX);
>
> discard:
> - tcp_drop(sk, skb);
> + tcp_drop(sk, skb, TCP_RCV_SYNSENT_STATE_PROCESS);
> return 0;
> } else {
> tcp_send_ack(sk);
> @@ -6568,7 +6578,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
>
> if (!queued) {
> discard:
same here.
> - tcp_drop(sk, skb);
> + tcp_drop(sk, skb, TCP_RCV_STATE_PROCESS);
> }
> return 0;
> }
> --
> 2.25.1
>
On Tue, 24 Aug 2021 05:51:40 -0700
Zhongya Yan <[email protected]> wrote:
> When using `tcp_drop(struct sock *sk, struct sk_buff *skb)` we can
> not tell why we need to delete `skb`. To solve this problem I updated the
> method `tcp_drop(struct sock *sk, struct sk_buff *skb, enum tcp_drop_reason reason)`
> to include the source of the deletion when it is done, so you can
> get an idea of the reason for the deletion based on the source.
>
> The current purpose is mainly derived from the suggestions
> of `Yonghong Song` and `brendangregg`:
>
> https://github.com/iovisor/bcc/issues/3533.
>
> "It is worthwhile to mention the context/why we want to this
> tracepoint with bcc issue https://github.com/iovisor/bcc/issues/3533.
> Mainly two reasons: (1). tcp_drop is a tiny function which
> may easily get inlined, a tracepoint is more stable, and (2).
> tcp_drop does not provide enough information on why it is dropped.
> " by Yonghong Song
>
> Signed-off-by: Zhongya Yan <[email protected]>
> ---
> include/net/tcp.h | 11 ++++++++
> include/trace/events/tcp.h | 56 ++++++++++++++++++++++++++++++++++++++
> net/ipv4/tcp_input.c | 34 +++++++++++++++--------
> 3 files changed, 89 insertions(+), 12 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 784d5c3ef1c5..dd8cd8d6f2f1 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -254,6 +254,17 @@ extern atomic_long_t tcp_memory_allocated;
> extern struct percpu_counter tcp_sockets_allocated;
> extern unsigned long tcp_memory_pressure;
>
> +enum tcp_drop_reason {
> + TCP_OFO_QUEUE = 1,
> + TCP_DATA_QUEUE_OFO = 2,
> + TCP_DATA_QUEUE = 3,
> + TCP_PRUNE_OFO_QUEUE = 4,
> + TCP_VALIDATE_INCOMING = 5,
> + TCP_RCV_ESTABLISHED = 6,
> + TCP_RCV_SYNSENT_STATE_PROCESS = 7,
> + TCP_RCV_STATE_PROCESS = 8
As enums increase by one, you could save yourself the burden of
updating the numbers and just have:
enum tcp_drop_reason {
TCP_OFO_QUEUE = 1,
TCP_DATA_QUEUE_OFO,
TCP_DATA_QUEUE,
TCP_PRUNE_OFO_QUEUE,
TCP_VALIDATE_INCOMING,
TCP_RCV_ESTABLISHED,
TCP_RCV_SYNSENT_STATE_PROCESS,
TCP_RCV_STATE_PROCESS
};
Which would do the same.
> +};
> +
> /* optimized version of sk_under_memory_pressure() for TCP sockets */
> static inline bool tcp_under_memory_pressure(const struct sock *sk)
> {
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index 521059d8dc0a..a0d3d31eb591 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -371,6 +371,62 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
> TP_ARGS(skb)
> );
>
If you would like to see the english translation of what these
"reasons" are and not have to remember which number is which, you can
do the following:
#define TCP_DROP_REASON \
EM(TCP_OFO_QUEUE, ofo_queue) \
EM(TCP_DATA_QUEUE_OFO, data_queue_ofo) \
EM(TCP_DATA_QUEUE, data_queue) \
EM(TCP_PRUNE_OFO_QUEUE, prune_ofo_queue) \
EM(TCP_VALIDATE_INCOMING, validate_incoming) \
EM(TCP_RCV_ESTABLISHED, rcv_established) \
EM(TCP_RCV_SYNSENT_STATE_PROCESS, rcv_synsent_state_process) \
EMe(TCP_RCV_STATE_PROCESS, rcv_state_proces)
#undef EM
#undef EMe
#define EM(a,b) { a, #b },
#define EMe(a, b) { a, #b }
> +TRACE_EVENT(tcp_drop,
> + TP_PROTO(struct sock *sk, struct sk_buff *skb, enum tcp_drop_reason reason),
> +
> + TP_ARGS(sk, skb, reason),
> +
> + TP_STRUCT__entry(
> + __array(__u8, saddr, sizeof(struct sockaddr_in6))
> + __array(__u8, daddr, sizeof(struct sockaddr_in6))
> + __field(__u16, sport)
> + __field(__u16, dport)
> + __field(__u32, mark)
> + __field(__u16, data_len)
> + __field(__u32, snd_nxt)
> + __field(__u32, snd_una)
> + __field(__u32, snd_cwnd)
> + __field(__u32, ssthresh)
> + __field(__u32, snd_wnd)
> + __field(__u32, srtt)
> + __field(__u32, rcv_wnd)
> + __field(__u64, sock_cookie)
> + __field(__u32, reason)
> + ),
> +
> + TP_fast_assign(
> + const struct tcphdr *th = (const struct tcphdr *)skb->data;
> + const struct inet_sock *inet = inet_sk(sk);
> + const struct tcp_sock *tp = tcp_sk(sk);
> +
> + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
> + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
> +
> + TP_STORE_ADDR_PORTS(__entry, inet, sk);
> +
> + __entry->sport = ntohs(inet->inet_sport);
> + __entry->dport = ntohs(inet->inet_dport);
> + __entry->mark = skb->mark;
> +
> + __entry->data_len = skb->len - __tcp_hdrlen(th);
> + __entry->snd_nxt = tp->snd_nxt;
> + __entry->snd_una = tp->snd_una;
> + __entry->snd_cwnd = tp->snd_cwnd;
> + __entry->snd_wnd = tp->snd_wnd;
> + __entry->rcv_wnd = tp->rcv_wnd;
> + __entry->ssthresh = tcp_current_ssthresh(sk);
> + __entry->srtt = tp->srtt_us >> 3;
> + __entry->sock_cookie = sock_gen_cookie(sk);
> + __entry->reason = reason;
> + ),
> +
> + TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx reason=%d",
Then above you can have: "reason=%s"
> + __entry->saddr, __entry->daddr, __entry->mark,
> + __entry->data_len, __entry->snd_nxt, __entry->snd_una,
> + __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
> + __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, __entry->reason)
And here:
__print_symbolic(__entry->reason, TCP_DROP_REASON)
-- Steve
> +);
> +
> #endif /* _TRACE_TCP_H */
>
> /* This part must be outside protection */
>
On Tue, Aug 24, 2021 at 5:08 PM Zhongya Yan <[email protected]> wrote:
>
> Cool, thanks!
> i will do it
Since these drops are hardly hot path, why not simply use a string ?
An ENUM will not really help grep games.
tcp_drop(sk, skb, "csum error");
The const char * argument would not be used unless tracepoint is
active, but who cares.
(Speaking of csum errors, they are not currently calling tcp_drop(),
but we could unify all packet drops to use this tracepoint,
and get rid of adhoc ones, like trace_tcp_bad_csum()
>
> Steven Rostedt <[email protected]> 于2021年8月24日周二 下午11:30写道:
>>
>> On Tue, 24 Aug 2021 05:51:40 -0700
>> Zhongya Yan <[email protected]> wrote:
>>
>> > When using `tcp_drop(struct sock *sk, struct sk_buff *skb)` we can
>> > not tell why we need to delete `skb`. To solve this problem I updated the
>> > method `tcp_drop(struct sock *sk, struct sk_buff *skb, enum tcp_drop_reason reason)`
>> > to include the source of the deletion when it is done, so you can
>> > get an idea of the reason for the deletion based on the source.
>> >
>> > The current purpose is mainly derived from the suggestions
>> > of `Yonghong Song` and `brendangregg`:
>> >
>> > https://github.com/iovisor/bcc/issues/3533.
>> >
>> > "It is worthwhile to mention the context/why we want to this
>> > tracepoint with bcc issue https://github.com/iovisor/bcc/issues/3533.
>> > Mainly two reasons: (1). tcp_drop is a tiny function which
>> > may easily get inlined, a tracepoint is more stable, and (2).
>> > tcp_drop does not provide enough information on why it is dropped.
>> > " by Yonghong Song
>> >
>> > Signed-off-by: Zhongya Yan <[email protected]>
>> > ---
>> > include/net/tcp.h | 11 ++++++++
>> > include/trace/events/tcp.h | 56 ++++++++++++++++++++++++++++++++++++++
>> > net/ipv4/tcp_input.c | 34 +++++++++++++++--------
>> > 3 files changed, 89 insertions(+), 12 deletions(-)
>> >
>> > diff --git a/include/net/tcp.h b/include/net/tcp.h
>> > index 784d5c3ef1c5..dd8cd8d6f2f1 100644
>> > --- a/include/net/tcp.h
>> > +++ b/include/net/tcp.h
>> > @@ -254,6 +254,17 @@ extern atomic_long_t tcp_memory_allocated;
>> > extern struct percpu_counter tcp_sockets_allocated;
>> > extern unsigned long tcp_memory_pressure;
>> >
>> > +enum tcp_drop_reason {
>> > + TCP_OFO_QUEUE = 1,
>> > + TCP_DATA_QUEUE_OFO = 2,
>> > + TCP_DATA_QUEUE = 3,
>> > + TCP_PRUNE_OFO_QUEUE = 4,
>> > + TCP_VALIDATE_INCOMING = 5,
>> > + TCP_RCV_ESTABLISHED = 6,
>> > + TCP_RCV_SYNSENT_STATE_PROCESS = 7,
>> > + TCP_RCV_STATE_PROCESS = 8
>>
>> As enums increase by one, you could save yourself the burden of
>> updating the numbers and just have:
>>
>> enum tcp_drop_reason {
>> TCP_OFO_QUEUE = 1,
>> TCP_DATA_QUEUE_OFO,
>> TCP_DATA_QUEUE,
>> TCP_PRUNE_OFO_QUEUE,
>> TCP_VALIDATE_INCOMING,
>> TCP_RCV_ESTABLISHED,
>> TCP_RCV_SYNSENT_STATE_PROCESS,
>> TCP_RCV_STATE_PROCESS
>> };
>>
>> Which would do the same.
>>
>>
>> > +};
>> > +
>> > /* optimized version of sk_under_memory_pressure() for TCP sockets */
>> > static inline bool tcp_under_memory_pressure(const struct sock *sk)
>> > {
>> > diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
>> > index 521059d8dc0a..a0d3d31eb591 100644
>> > --- a/include/trace/events/tcp.h
>> > +++ b/include/trace/events/tcp.h
>> > @@ -371,6 +371,62 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
>> > TP_ARGS(skb)
>> > );
>> >
>>
>> If you would like to see the english translation of what these
>> "reasons" are and not have to remember which number is which, you can
>> do the following:
>>
>> #define TCP_DROP_REASON \
>> EM(TCP_OFO_QUEUE, ofo_queue) \
>> EM(TCP_DATA_QUEUE_OFO, data_queue_ofo) \
>> EM(TCP_DATA_QUEUE, data_queue) \
>> EM(TCP_PRUNE_OFO_QUEUE, prune_ofo_queue) \
>> EM(TCP_VALIDATE_INCOMING, validate_incoming) \
>> EM(TCP_RCV_ESTABLISHED, rcv_established) \
>> EM(TCP_RCV_SYNSENT_STATE_PROCESS, rcv_synsent_state_process) \
>> EMe(TCP_RCV_STATE_PROCESS, rcv_state_proces)
>>
>> #undef EM
>> #undef EMe
>>
>> #define EM(a,b) { a, #b },
>> #define EMe(a, b) { a, #b }
>>
>>
>> > +TRACE_EVENT(tcp_drop,
>> > + TP_PROTO(struct sock *sk, struct sk_buff *skb, enum tcp_drop_reason reason),
>> > +
>> > + TP_ARGS(sk, skb, reason),
>> > +
>> > + TP_STRUCT__entry(
>> > + __array(__u8, saddr, sizeof(struct sockaddr_in6))
>> > + __array(__u8, daddr, sizeof(struct sockaddr_in6))
>> > + __field(__u16, sport)
>> > + __field(__u16, dport)
>> > + __field(__u32, mark)
>> > + __field(__u16, data_len)
>> > + __field(__u32, snd_nxt)
>> > + __field(__u32, snd_una)
>> > + __field(__u32, snd_cwnd)
>> > + __field(__u32, ssthresh)
>> > + __field(__u32, snd_wnd)
>> > + __field(__u32, srtt)
>> > + __field(__u32, rcv_wnd)
>> > + __field(__u64, sock_cookie)
>> > + __field(__u32, reason)
>> > + ),
>> > +
>> > + TP_fast_assign(
>> > + const struct tcphdr *th = (const struct tcphdr *)skb->data;
>> > + const struct inet_sock *inet = inet_sk(sk);
>> > + const struct tcp_sock *tp = tcp_sk(sk);
>> > +
>> > + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
>> > + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
>> > +
>> > + TP_STORE_ADDR_PORTS(__entry, inet, sk);
>> > +
>> > + __entry->sport = ntohs(inet->inet_sport);
>> > + __entry->dport = ntohs(inet->inet_dport);
>> > + __entry->mark = skb->mark;
>> > +
>> > + __entry->data_len = skb->len - __tcp_hdrlen(th);
>> > + __entry->snd_nxt = tp->snd_nxt;
>> > + __entry->snd_una = tp->snd_una;
>> > + __entry->snd_cwnd = tp->snd_cwnd;
>> > + __entry->snd_wnd = tp->snd_wnd;
>> > + __entry->rcv_wnd = tp->rcv_wnd;
>> > + __entry->ssthresh = tcp_current_ssthresh(sk);
>> > + __entry->srtt = tp->srtt_us >> 3;
>> > + __entry->sock_cookie = sock_gen_cookie(sk);
>> > + __entry->reason = reason;
>> > + ),
>> > +
>> > + TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx reason=%d",
>>
>> Then above you can have: "reason=%s"
>>
>> > + __entry->saddr, __entry->daddr, __entry->mark,
>> > + __entry->data_len, __entry->snd_nxt, __entry->snd_una,
>> > + __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
>> > + __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, __entry->reason)
>>
>> And here:
>>
>> __print_symbolic(__entry->reason, TCP_DROP_REASON)
>>
>> -- Steve
>>
>>
>> > +);
>> > +
>> > #endif /* _TRACE_TCP_H */
>> >
>> > /* This part must be outside protection */
>> >
On Wed, 25 Aug 2021 08:39:40 -0700
Eric Dumazet <[email protected]> wrote:
> Since these drops are hardly hot path, why not simply use a string ?
> An ENUM will not really help grep games.
I'm more concerned with ring buffer space than hot paths. The ring
buffer is limited in size, and the bigger the events, the less there
are.
grep games shouldn't be too bad, since it would find the place that
maps the names with the enums, and then you just search for the enums.
-- Steve