From: Menglong Dong <[email protected]>
In this series patch, the interface kfree_skb_with_reason() is
introduced(), which is used to collect skb drop reason, and pass
it to 'kfree_skb' tracepoint. Therefor, 'drop_monitor' or eBPF is
able to monitor abnormal skb with detail reason.
In fact, this series patches are out of the intelligence of David
and Steve, I'm just a truck man :/
Previous discussion is here:
https://lore.kernel.org/netdev/[email protected]/
https://lore.kernel.org/netdev/[email protected]/
In the first patch, kfree_skb_with_reason() is introduced and
the 'reason' field is added to 'kfree_skb' tracepoint. In the
second patch, 'kfree_skb()' in replaced with 'kfree_skb_with_reason()'
in tcp_v4_rcv().
Menglong Dong (2):
net: skb: introduce kfree_skb_with_reason()
net: skb: use kfree_skb_with_reason() in tcp_v4_rcv()
include/linux/skbuff.h | 16 ++++++++++++++++
include/trace/events/skb.h | 39 +++++++++++++++++++++++++++++++-------
net/core/dev.c | 3 ++-
net/core/drop_monitor.c | 10 +++++++---
net/core/skbuff.c | 22 ++++++++++++++++++++-
net/ipv4/tcp_ipv4.c | 10 ++++++++--
6 files changed, 86 insertions(+), 14 deletions(-)
--
2.27.0
From: Menglong Dong <[email protected]>
Introduce the interface kfree_skb_with_reason(), which is used to pass
the reason why the skb is dropped to 'kfree_skb' tracepoint.
Add the 'reason' field to 'trace_kfree_skb', therefor user can get
more detail information about abnormal skb with 'drop_monitor' or
eBPF.
Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 13 +++++++++++++
include/trace/events/skb.h | 36 +++++++++++++++++++++++++++++-------
net/core/dev.c | 3 ++-
net/core/drop_monitor.c | 10 +++++++---
net/core/skbuff.c | 22 +++++++++++++++++++++-
5 files changed, 72 insertions(+), 12 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aa9d42724e20..3620b3ff2154 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -305,6 +305,17 @@ struct sk_buff_head {
struct sk_buff;
+/* The reason of skb drop, which is used in kfree_skb_with_reason().
+ * en...maybe they should be splited by group?
+ *
+ * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is
+ * used to translate the reason to string.
+ */
+enum skb_drop_reason {
+ SKB_DROP_REASON_NOT_SPECIFIED,
+ SKB_DROP_REASON_MAX,
+};
+
/* To allow 64K frame to be packed as single skb without frag_list we
* require 64K/PAGE_SIZE pages plus 1 additional page to allow for
* buffers which do not start on a page boundary.
@@ -1087,6 +1098,8 @@ static inline bool skb_unref(struct sk_buff *skb)
void skb_release_head_state(struct sk_buff *skb);
void kfree_skb(struct sk_buff *skb);
+void kfree_skb_with_reason(struct sk_buff *skb,
+ enum skb_drop_reason reason);
void kfree_skb_list(struct sk_buff *segs);
void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
void skb_tx_error(struct sk_buff *skb);
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 9e92f22eb086..cab1c08a30cd 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -9,29 +9,51 @@
#include <linux/netdevice.h>
#include <linux/tracepoint.h>
+#define TRACE_SKB_DROP_REASON \
+ EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \
+ EMe(SKB_DROP_REASON_MAX, HAHA_MAX)
+
+#undef EM
+#undef EMe
+
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+TRACE_SKB_DROP_REASON
+
+#undef EM
+#undef EMe
+#define EM(a, b) { a, #b },
+#define EMe(a, b) { a, #b }
+
+
/*
* Tracepoint for free an sk_buff:
*/
TRACE_EVENT(kfree_skb,
- TP_PROTO(struct sk_buff *skb, void *location),
+ TP_PROTO(struct sk_buff *skb, void *location,
+ enum skb_drop_reason reason),
- TP_ARGS(skb, location),
+ TP_ARGS(skb, location, reason),
TP_STRUCT__entry(
- __field( void *, skbaddr )
- __field( void *, location )
- __field( unsigned short, protocol )
+ __field(void *, skbaddr)
+ __field(void *, location)
+ __field(unsigned short, protocol)
+ __field(enum skb_drop_reason, reason)
),
TP_fast_assign(
__entry->skbaddr = skb;
__entry->location = location;
__entry->protocol = ntohs(skb->protocol);
+ __entry->reason = reason;
),
- TP_printk("skbaddr=%p protocol=%u location=%p",
- __entry->skbaddr, __entry->protocol, __entry->location)
+ TP_printk("skbaddr=%p protocol=%u location=%p reason: %s",
+ __entry->skbaddr, __entry->protocol, __entry->location,
+ __print_symbolic(__entry->reason, TRACE_SKB_DROP_REASON))
);
TRACE_EVENT(consume_skb,
diff --git a/net/core/dev.c b/net/core/dev.c
index 644b9c8be3a8..9464dbf9e3d6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4899,7 +4899,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
trace_consume_skb(skb);
else
- trace_kfree_skb(skb, net_tx_action);
+ trace_kfree_skb(skb, net_tx_action,
+ SKB_DROP_REASON_NOT_SPECIFIED);
if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
__kfree_skb(skb);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 3d0ab2eec916..7b288a121a41 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -110,7 +110,8 @@ static u32 net_dm_queue_len = 1000;
struct net_dm_alert_ops {
void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
- void *location);
+ void *location,
+ enum skb_drop_reason reason);
void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
int work, int budget);
void (*work_item_func)(struct work_struct *work);
@@ -262,7 +263,9 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
spin_unlock_irqrestore(&data->lock, flags);
}
-static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
+static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
+ void *location,
+ enum skb_drop_reason reason)
{
trace_drop_common(skb, location);
}
@@ -490,7 +493,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
struct sk_buff *skb,
- void *location)
+ void *location,
+ enum skb_drop_reason reason)
{
ktime_t tstamp = ktime_get_real();
struct per_cpu_dm_data *data;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 275f7b8416fe..570dc022a8a1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -770,11 +770,31 @@ void kfree_skb(struct sk_buff *skb)
if (!skb_unref(skb))
return;
- trace_kfree_skb(skb, __builtin_return_address(0));
+ trace_kfree_skb(skb, __builtin_return_address(0),
+ SKB_DROP_REASON_NOT_SPECIFIED);
__kfree_skb(skb);
}
EXPORT_SYMBOL(kfree_skb);
+/**
+ * kfree_skb_with_reason - free an sk_buff with reason
+ * @skb: buffer to free
+ * @reason: reason why this skb is dropped
+ *
+ * The same as kfree_skb() except that this function will pass
+ * the drop reason to 'kfree_skb' tracepoint.
+ */
+void kfree_skb_with_reason(struct sk_buff *skb,
+ enum skb_drop_reason reason)
+{
+ if (!skb_unref(skb))
+ return;
+
+ trace_kfree_skb(skb, __builtin_return_address(0), reason);
+ __kfree_skb(skb);
+}
+EXPORT_SYMBOL(kfree_skb_with_reason);
+
void kfree_skb_list(struct sk_buff *segs)
{
while (segs) {
--
2.30.2
From: Menglong Dong <[email protected]>
Replace kfree_skb() with kfree_skb_with_reason() in tcp_v4_rcv().
Following drop reason are added:
SKB_DROP_REASON_NO_SOCK
SKB_DROP_REASON_BAD_PACKET
SKB_DROP_REASON_TCP_CSUM
After this patch, 'kfree_skb' event will print message like this:
$ TASK-PID CPU# ||||| TIMESTAMP FUNCTION
$ | | | ||||| | |
<idle>-0 [000] ..s1. 36.113438: kfree_skb: skbaddr=(____ptrval____) protocol=2048 location=(____ptrval____) reason: NO_SOCK
The reason of skb drop is printed too.
Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 3 +++
include/trace/events/skb.h | 3 +++
net/ipv4/tcp_ipv4.c | 10 ++++++++--
3 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3620b3ff2154..f85db6c035d1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -313,6 +313,9 @@ struct sk_buff;
*/
enum skb_drop_reason {
SKB_DROP_REASON_NOT_SPECIFIED,
+ SKB_DROP_REASON_NO_SOCK,
+ SKB_DROP_REASON_BAD_PACKET,
+ SKB_DROP_REASON_TCP_CSUM,
SKB_DROP_REASON_MAX,
};
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index cab1c08a30cd..b9ea6b4ed7ec 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -11,6 +11,9 @@
#define TRACE_SKB_DROP_REASON \
EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \
+ EM(SKB_DROP_REASON_NO_SOCK, NO_SOCK) \
+ EM(SKB_DROP_REASON_BAD_PACKET, BAD_PACKET) \
+ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \
EMe(SKB_DROP_REASON_MAX, HAHA_MAX)
#undef EM
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ac10e4cdd8d0..03dc4c79b84b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1971,8 +1971,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
const struct tcphdr *th;
bool refcounted;
struct sock *sk;
+ int drop_reason;
int ret;
+ drop_reason = 0;
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
@@ -1984,8 +1986,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
- if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
+ if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
+ drop_reason = SKB_DROP_REASON_BAD_PACKET;
goto bad_packet;
+ }
if (!pskb_may_pull(skb, th->doff * 4))
goto discard_it;
@@ -2124,6 +2128,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
return ret;
no_tcp_socket:
+ drop_reason = SKB_DROP_REASON_NO_SOCK;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
@@ -2131,6 +2136,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (tcp_checksum_complete(skb)) {
csum_error:
+ drop_reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
@@ -2141,7 +2147,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
discard_it:
/* Discard frame. */
- kfree_skb(skb);
+ kfree_skb_with_reason(skb, drop_reason);
return 0;
discard_and_relse:
--
2.30.2
On 12/29/21 7:32 AM, [email protected] wrote:
> From: Menglong Dong <[email protected]>
>
> Replace kfree_skb() with kfree_skb_with_reason() in tcp_v4_rcv().
> Following drop reason are added:
>
> SKB_DROP_REASON_NO_SOCK
> SKB_DROP_REASON_BAD_PACKET
> SKB_DROP_REASON_TCP_CSUM
>
> After this patch, 'kfree_skb' event will print message like this:
>
> $ TASK-PID CPU# ||||| TIMESTAMP FUNCTION
> $ | | | ||||| | |
> <idle>-0 [000] ..s1. 36.113438: kfree_skb: skbaddr=(____ptrval____) protocol=2048 location=(____ptrval____) reason: NO_SOCK
>
> The reason of skb drop is printed too.
>
> Signed-off-by: Menglong Dong <[email protected]>
> ---
> include/linux/skbuff.h | 3 +++
> include/trace/events/skb.h | 3 +++
> net/ipv4/tcp_ipv4.c | 10 ++++++++--
your first patch set was targeting UDP and now you are starting with tcp?
> 3 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 3620b3ff2154..f85db6c035d1 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -313,6 +313,9 @@ struct sk_buff;
> */
> enum skb_drop_reason {
> SKB_DROP_REASON_NOT_SPECIFIED,
> + SKB_DROP_REASON_NO_SOCK,
SKB_DROP_REASON_NO_SOCKET
> + SKB_DROP_REASON_BAD_PACKET,
SKB_DROP_REASON_PKT_TOO_SMALL
User oriented messages, not code based.
> + SKB_DROP_REASON_TCP_CSUM,
> SKB_DROP_REASON_MAX,
> };
>
> diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
> index cab1c08a30cd..b9ea6b4ed7ec 100644
> --- a/include/trace/events/skb.h
> +++ b/include/trace/events/skb.h
> @@ -11,6 +11,9 @@
>
> #define TRACE_SKB_DROP_REASON \
> EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \
> + EM(SKB_DROP_REASON_NO_SOCK, NO_SOCK) \
> + EM(SKB_DROP_REASON_BAD_PACKET, BAD_PACKET) \
> + EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \
> EMe(SKB_DROP_REASON_MAX, HAHA_MAX)
>
> #undef EM
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index ac10e4cdd8d0..03dc4c79b84b 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1971,8 +1971,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
> const struct tcphdr *th;
> bool refcounted;
> struct sock *sk;
> + int drop_reason;
> int ret;
>
> + drop_reason = 0;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
On Thu, Dec 30, 2021 at 4:18 AM David Ahern <[email protected]> wrote:
>
> On 12/29/21 7:32 AM, [email protected] wrote:
> > From: Menglong Dong <[email protected]>
> >
> > Replace kfree_skb() with kfree_skb_with_reason() in tcp_v4_rcv().
> > Following drop reason are added:
> >
> > SKB_DROP_REASON_NO_SOCK
> > SKB_DROP_REASON_BAD_PACKET
> > SKB_DROP_REASON_TCP_CSUM
> >
> > After this patch, 'kfree_skb' event will print message like this:
> >
> > $ TASK-PID CPU# ||||| TIMESTAMP FUNCTION
> > $ | | | ||||| | |
> > <idle>-0 [000] ..s1. 36.113438: kfree_skb: skbaddr=(____ptrval____) protocol=2048 location=(____ptrval____) reason: NO_SOCK
> >
> > The reason of skb drop is printed too.
> >
> > Signed-off-by: Menglong Dong <[email protected]>
> > ---
> > include/linux/skbuff.h | 3 +++
> > include/trace/events/skb.h | 3 +++
> > net/ipv4/tcp_ipv4.c | 10 ++++++++--
>
> your first patch set was targeting UDP and now you are starting with tcp?
Yeah, I think TCP is used more, which can be a good starting point. After
all, general protocols are all my target.
>
>
> > 3 files changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 3620b3ff2154..f85db6c035d1 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -313,6 +313,9 @@ struct sk_buff;
> > */
> > enum skb_drop_reason {
> > SKB_DROP_REASON_NOT_SPECIFIED,
> > + SKB_DROP_REASON_NO_SOCK,
>
> SKB_DROP_REASON_NO_SOCKET
>
> > + SKB_DROP_REASON_BAD_PACKET,
>
> SKB_DROP_REASON_PKT_TOO_SMALL
>
> User oriented messages, not code based.
>
Ok, get it!
Thanks!
Menglong Dong
>
> > + SKB_DROP_REASON_TCP_CSUM,
> > SKB_DROP_REASON_MAX,
> > };
> >
> > diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
> > index cab1c08a30cd..b9ea6b4ed7ec 100644
> > --- a/include/trace/events/skb.h
> > +++ b/include/trace/events/skb.h
> > @@ -11,6 +11,9 @@
> >
> > #define TRACE_SKB_DROP_REASON \
> > EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \
> > + EM(SKB_DROP_REASON_NO_SOCK, NO_SOCK) \
> > + EM(SKB_DROP_REASON_BAD_PACKET, BAD_PACKET) \
> > + EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \
> > EMe(SKB_DROP_REASON_MAX, HAHA_MAX)
> >
> > #undef EM
> > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> > index ac10e4cdd8d0..03dc4c79b84b 100644
> > --- a/net/ipv4/tcp_ipv4.c
> > +++ b/net/ipv4/tcp_ipv4.c
> > @@ -1971,8 +1971,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
> > const struct tcphdr *th;
> > bool refcounted;
> > struct sock *sk;
> > + int drop_reason;
> > int ret;
> >
> > + drop_reason = 0;
>
> drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
>