2022-02-03 17:51:38

by Dongli Zhang

[permalink] [raw]
Subject: [PATCH RFC 1/4] net: skb: use line number to trace dropped skb

Sometimes the kernel may not directly call kfree_skb() to drop the sk_buff.
Instead, it "goto drop" and call kfree_skb() at 'drop'. This make it
difficult to track the reason that the sk_buff is dropped.

The commit c504e5c2f964 ("net: skb: introduce kfree_skb_reason()") has
introduced the kfree_skb_reason() to help track the reason. However, we may
need to define many reasons for each driver/subsystem.

To avoid introducing so many new reasons, this is to use line number
("__LINE__") to trace where the sk_buff is dropped. As a result, the reason
will be generated automatically.

Cc: Joao Martins <[email protected]>
Cc: Joe Jin <[email protected]>
Signed-off-by: Dongli Zhang <[email protected]>
---
include/linux/skbuff.h | 21 ++++-----------------
include/trace/events/skb.h | 35 ++++++-----------------------------
net/core/dev.c | 2 +-
net/core/skbuff.c | 9 ++++-----
net/ipv4/tcp_ipv4.c | 14 +++++++-------
net/ipv4/udp.c | 14 +++++++-------
6 files changed, 29 insertions(+), 66 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8a636e678902..471268a4a497 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -307,21 +307,8 @@ struct sk_buff_head {

struct sk_buff;

-/* The reason of skb drop, which is used in kfree_skb_reason().
- * en...maybe they should be splited by group?
- *
- * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is
- * used to translate the reason to string.
- */
-enum skb_drop_reason {
- SKB_DROP_REASON_NOT_SPECIFIED,
- SKB_DROP_REASON_NO_SOCKET,
- SKB_DROP_REASON_PKT_TOO_SMALL,
- SKB_DROP_REASON_TCP_CSUM,
- SKB_DROP_REASON_SOCKET_FILTER,
- SKB_DROP_REASON_UDP_CSUM,
- SKB_DROP_REASON_MAX,
-};
+#define SKB_DROP_LINE_NONE 0
+#define SKB_DROP_LINE __LINE__

/* To allow 64K frame to be packed as single skb without frag_list we
* require 64K/PAGE_SIZE pages plus 1 additional page to allow for
@@ -1103,7 +1090,7 @@ static inline bool skb_unref(struct sk_buff *skb)
return true;
}

-void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
+void kfree_skb_reason(struct sk_buff *skb, unsigned int line);

/**
* kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
@@ -1111,7 +1098,7 @@ void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
*/
static inline void kfree_skb(struct sk_buff *skb)
{
- kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
+ kfree_skb_reason(skb, SKB_DROP_LINE_NONE);
}

void skb_release_head_state(struct sk_buff *skb);
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index a8a64b97504d..45d1a1757ff1 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -9,56 +9,33 @@
#include <linux/netdevice.h>
#include <linux/tracepoint.h>

-#define TRACE_SKB_DROP_REASON \
- EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \
- EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \
- EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \
- EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \
- EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \
- EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \
- EMe(SKB_DROP_REASON_MAX, MAX)
-
-#undef EM
-#undef EMe
-
-#define EM(a, b) TRACE_DEFINE_ENUM(a);
-#define EMe(a, b) TRACE_DEFINE_ENUM(a);
-
-TRACE_SKB_DROP_REASON
-
-#undef EM
-#undef EMe
-#define EM(a, b) { a, #b },
-#define EMe(a, b) { a, #b }
-
/*
* Tracepoint for free an sk_buff:
*/
TRACE_EVENT(kfree_skb,

TP_PROTO(struct sk_buff *skb, void *location,
- enum skb_drop_reason reason),
+ unsigned int line),

- TP_ARGS(skb, location, reason),
+ TP_ARGS(skb, location, line),

TP_STRUCT__entry(
__field(void *, skbaddr)
__field(void *, location)
__field(unsigned short, protocol)
- __field(enum skb_drop_reason, reason)
+ __field(unsigned int, line)
),

TP_fast_assign(
__entry->skbaddr = skb;
__entry->location = location;
__entry->protocol = ntohs(skb->protocol);
- __entry->reason = reason;
+ __entry->line = line;
),

- TP_printk("skbaddr=%p protocol=%u location=%p reason: %s",
+ TP_printk("skbaddr=%p protocol=%u location=%p line: %u",
__entry->skbaddr, __entry->protocol, __entry->location,
- __print_symbolic(__entry->reason,
- TRACE_SKB_DROP_REASON))
+ __entry->line)
);

TRACE_EVENT(consume_skb,
diff --git a/net/core/dev.c b/net/core/dev.c
index 1baab07820f6..448f390d35d3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4900,7 +4900,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
trace_consume_skb(skb);
else
trace_kfree_skb(skb, net_tx_action,
- SKB_DROP_REASON_NOT_SPECIFIED);
+ SKB_DROP_LINE);

if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
__kfree_skb(skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0118f0afaa4f..8572c3bce264 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -761,18 +761,17 @@ EXPORT_SYMBOL(__kfree_skb);
/**
* kfree_skb_reason - free an sk_buff with special reason
* @skb: buffer to free
- * @reason: reason why this skb is dropped
+ * @line: the line where this skb is dropped
*
* Drop a reference to the buffer and free it if the usage count has
- * hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
- * tracepoint.
+ * hit zero. Meanwhile, pass the drop line to 'kfree_skb' tracepoint.
*/
-void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+void kfree_skb_reason(struct sk_buff *skb, unsigned int line)
{
if (!skb_unref(skb))
return;

- trace_kfree_skb(skb, __builtin_return_address(0), reason);
+ trace_kfree_skb(skb, __builtin_return_address(0), line);
__kfree_skb(skb);
}
EXPORT_SYMBOL(kfree_skb_reason);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fec656f5a39e..f23af94d1186 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1971,10 +1971,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
const struct tcphdr *th;
bool refcounted;
struct sock *sk;
- int drop_reason;
+ unsigned int drop_line;
int ret;

- drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ drop_line = SKB_DROP_LINE_NONE;
if (skb->pkt_type != PACKET_HOST)
goto discard_it;

@@ -1987,7 +1987,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;

if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
- drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
+ drop_line = SKB_DROP_LINE;
goto bad_packet;
}
if (!pskb_may_pull(skb, th->doff * 4))
@@ -2095,7 +2095,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
nf_reset_ct(skb);

if (tcp_filter(sk, skb)) {
- drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ drop_line = SKB_DROP_LINE;
goto discard_and_relse;
}
th = (const struct tcphdr *)skb->data;
@@ -2130,7 +2130,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
return ret;

no_tcp_socket:
- drop_reason = SKB_DROP_REASON_NO_SOCKET;
+ drop_line = SKB_DROP_LINE;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;

@@ -2138,7 +2138,7 @@ int tcp_v4_rcv(struct sk_buff *skb)

if (tcp_checksum_complete(skb)) {
csum_error:
- drop_reason = SKB_DROP_REASON_TCP_CSUM;
+ drop_line = SKB_DROP_LINE;
trace_tcp_bad_csum(skb);
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
@@ -2149,7 +2149,7 @@ int tcp_v4_rcv(struct sk_buff *skb)

discard_it:
/* Discard frame. */
- kfree_skb_reason(skb, drop_reason);
+ kfree_skb_reason(skb, drop_line);
return 0;

discard_and_relse:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 090360939401..f0715d1072d7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2411,9 +2411,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
__be32 saddr, daddr;
struct net *net = dev_net(skb->dev);
bool refcounted;
- int drop_reason;
+ unsigned int drop_line;

- drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ drop_line = SKB_DROP_LINE_NONE;

/*
* Validate the packet.
@@ -2469,7 +2469,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp_lib_checksum_complete(skb))
goto csum_error;

- drop_reason = SKB_DROP_REASON_NO_SOCKET;
+ drop_line = SKB_DROP_LINE;
__UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);

@@ -2477,11 +2477,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
* Hmm. We got an UDP packet to a port to which we
* don't wanna listen. Ignore it.
*/
- kfree_skb_reason(skb, drop_reason);
+ kfree_skb_reason(skb, drop_line);
return 0;

short_packet:
- drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
+ drop_line = SKB_DROP_LINE;
net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
proto == IPPROTO_UDPLITE ? "Lite" : "",
&saddr, ntohs(uh->source),
@@ -2494,7 +2494,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
* RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
- drop_reason = SKB_DROP_REASON_UDP_CSUM;
+ drop_line = SKB_DROP_LINE;
net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
proto == IPPROTO_UDPLITE ? "Lite" : "",
&saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
@@ -2502,7 +2502,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
__UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
drop:
__UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
- kfree_skb_reason(skb, drop_reason);
+ kfree_skb_reason(skb, drop_line);
return 0;
}

--
2.17.1


2022-02-03 20:37:28

by Dongli Zhang

[permalink] [raw]
Subject: Re: [PATCH RFC 1/4] net: skb: use line number to trace dropped skb


Hi David,

On 2/3/22 7:48 AM, David Ahern wrote:
> On 2/3/22 8:37 AM, Dongli Zhang wrote:
>> Sometimes the kernel may not directly call kfree_skb() to drop the sk_buff.
>> Instead, it "goto drop" and call kfree_skb() at 'drop'. This make it
>> difficult to track the reason that the sk_buff is dropped.
>>
>> The commit c504e5c2f964 ("net: skb: introduce kfree_skb_reason()") has
>> introduced the kfree_skb_reason() to help track the reason. However, we may
>> need to define many reasons for each driver/subsystem.
>>
>> To avoid introducing so many new reasons, this is to use line number
>> ("__LINE__") to trace where the sk_buff is dropped. As a result, the reason
>> will be generated automatically.
>>
>
> I don't agree with this approach. It is only marginally better than the
> old kfree_skb that only gave the instruction pointer. That tells you the
> function that dropped the packet, but not why the packet is dropped.
> Adding the line number only makes users have to consult the source code.
>
> When I watch drop monitor for kfree_skb I want to know *why* the packet
> was dropped, not the line number in the source code. e.g., dropmon
> showing OTHERHOST means too many packets are sent to this host (e.g.,
> hypervisor) that do not belong to the host or the VMs running on it, or
> packets have invalid checksum (IP, TCP, UDP). Usable information by
> everyone, not just someone with access to the source code for that
> specific kernel.
>
Thank you very much for the suggestion!

I will not follow this approach. I will introduce new reasons to TUN and TAP
drivers.

Dongli Zhang

2022-02-04 00:38:05

by David Ahern

[permalink] [raw]
Subject: Re: [PATCH RFC 1/4] net: skb: use line number to trace dropped skb

On 2/3/22 8:37 AM, Dongli Zhang wrote:
> Sometimes the kernel may not directly call kfree_skb() to drop the sk_buff.
> Instead, it "goto drop" and call kfree_skb() at 'drop'. This make it
> difficult to track the reason that the sk_buff is dropped.
>
> The commit c504e5c2f964 ("net: skb: introduce kfree_skb_reason()") has
> introduced the kfree_skb_reason() to help track the reason. However, we may
> need to define many reasons for each driver/subsystem.
>
> To avoid introducing so many new reasons, this is to use line number
> ("__LINE__") to trace where the sk_buff is dropped. As a result, the reason
> will be generated automatically.
>

I don't agree with this approach. It is only marginally better than the
old kfree_skb that only gave the instruction pointer. That tells you the
function that dropped the packet, but not why the packet is dropped.
Adding the line number only makes users have to consult the source code.

When I watch drop monitor for kfree_skb I want to know *why* the packet
was dropped, not the line number in the source code. e.g., dropmon
showing OTHERHOST means too many packets are sent to this host (e.g.,
hypervisor) that do not belong to the host or the VMs running on it, or
packets have invalid checksum (IP, TCP, UDP). Usable information by
everyone, not just someone with access to the source code for that
specific kernel.