2022-02-15 13:25:36

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 00/19] net: add skb drop reasons for TCP, IP, dev and neigh

From: Menglong Dong <[email protected]>

In this series patches, reasons for skb drops are added to TCP, IP, dev
and neigh.

For TCP layer, the path of TCP data receive and enqueue are considered.
However, it's more complex for TCP state processing, as I find that it's
hard to report skb drop reasons to where it is freed. For example,
when skb is dropped in tcp_rcv_state_process(), the reason can be caused
by the call of tcp_v4_conn_request(), and it's hard to return a drop
reason from tcp_v4_conn_request(). So I just skip such case for this
moment.

For IP layer, skb drop reasons are added to the packet outputting path.
Seems the reasons are not complex, so I didn't split the commits by
functions.

For neighbour part, SKB_DROP_REASON_NEIGH_FAILED and
SKB_DROP_REASON_NEIGH_QUEUEFULL are added.

For link layer, reasons are added for both packet inputting and
outputting path.

The amount of patches in this series seems a bit too many, maybe I should
join some of them? For example, combine the patches of dev to one.

Menglong Dong (19):
net: tcp: introduce tcp_drop_reason()
net: tcp: add skb drop reasons to tcp_v4_rcv()
net: tcp: use kfree_skb_reason() for tcp_v6_rcv()
net: tcp: add skb drop reasons to tcp_v{4,6}_inbound_md5_hash()
net: tcp: add skb drop reasons to tcp_add_backlog()
net: tcp: use kfree_skb_reason() for tcp_v{4,6}_do_rcv()
net: tcp: use tcp_drop_reason() for tcp_rcv_established()
net: tcp: use tcp_drop_reason() for tcp_data_queue()
net: tcp: use tcp_drop_reason() for tcp_data_queue_ofo()
net: ip: add skb drop reasons during ip outputting
net: neigh: use kfree_skb_reason() for __neigh_event_send()
net: neigh: add skb drop reasons to arp_error_report()
net: dev: use kfree_skb_reason() for sch_handle_egress()
net: skb: introduce the function kfree_skb_list_reason()
net: dev: add skb drop reasons to __dev_xmit_skb()
net: dev: use kfree_skb_reason() for enqueue_to_backlog()
net: dev: use kfree_skb_reason() for do_xdp_generic()
net: dev: use kfree_skb_reason() for sch_handle_ingress()
net: dev: use kfree_skb_reason() for __netif_receive_skb_core()

include/linux/skbuff.h | 82 +++++++++++++++++++++++++++++++++++++-
include/net/tcp.h | 3 +-
include/trace/events/skb.h | 21 ++++++++++
net/core/dev.c | 25 +++++++-----
net/core/neighbour.c | 4 +-
net/core/skbuff.c | 7 ++--
net/ipv4/arp.c | 2 +-
net/ipv4/ip_output.c | 6 +--
net/ipv4/tcp_input.c | 45 ++++++++++++++++-----
net/ipv4/tcp_ipv4.c | 36 ++++++++++++-----
net/ipv6/ip6_output.c | 6 +--
net/ipv6/tcp_ipv6.c | 42 ++++++++++++++-----
12 files changed, 227 insertions(+), 52 deletions(-)

--
2.34.1


2022-02-15 13:37:33

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 05/19] net: tcp: add skb drop reasons to tcp_add_backlog()

From: Menglong Dong <[email protected]>

Pass the address of drop_reason to tcp_add_backlog() to store the
reasons for skb drops when fails. Following drop reasons are
introduced:

SKB_DROP_REASON_SOCKET_BACKLOG

Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 4 ++++
include/net/tcp.h | 3 ++-
include/trace/events/skb.h | 1 +
net/ipv4/tcp_ipv4.c | 7 +++++--
net/ipv6/tcp_ipv6.c | 2 +-
5 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aea46b38cffa..9a4424ceb7cb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -353,6 +353,10 @@ enum skb_drop_reason {
* expecting one
*/
SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong */
+ SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket
+ * backlog (see
+ * LINUX_MIB_TCPBACKLOGDROP)
+ */
SKB_DROP_REASON_MAX,
};

diff --git a/include/net/tcp.h b/include/net/tcp.h
index eff2487d972d..04f4650e0ff0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1367,7 +1367,8 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
__skb_checksum_complete(skb);
}

-bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason);

#ifdef CONFIG_INET
void __sk_defer_free_flush(struct sock *sk);
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 46c06b0be850..bfccd77e9071 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -31,6 +31,7 @@
EM(SKB_DROP_REASON_TCP_MD5UNEXPECTED, \
TCP_MD5UNEXPECTED) \
EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \
+ EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG) \
EMe(SKB_DROP_REASON_MAX, MAX)

#undef EM
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3e7ab605dddc..15ef1bcff84f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1811,7 +1811,8 @@ int tcp_v4_early_demux(struct sk_buff *skb)
return 0;
}

-bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason)
{
u32 limit, tail_gso_size, tail_gso_segs;
struct skb_shared_info *shinfo;
@@ -1837,6 +1838,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_checksum_complete(skb))) {
bh_unlock_sock(sk);
trace_tcp_bad_csum(skb);
+ *reason = SKB_DROP_REASON_TCP_CSUM;
__TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
return true;
@@ -1925,6 +1927,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)

if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
+ *reason = SKB_DROP_REASON_SOCKET_BACKLOG;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
return true;
}
@@ -2133,7 +2136,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (!sock_owned_by_user(sk)) {
ret = tcp_v4_do_rcv(sk, skb);
} else {
- if (tcp_add_backlog(sk, skb))
+ if (tcp_add_backlog(sk, skb, &drop_reason))
goto discard_and_relse;
}
bh_unlock_sock(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index dfefbc1eac5d..15090fe5af90 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1784,7 +1784,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
if (!sock_owned_by_user(sk)) {
ret = tcp_v6_do_rcv(sk, skb);
} else {
- if (tcp_add_backlog(sk, skb))
+ if (tcp_add_backlog(sk, skb, &drop_reason))
goto discard_and_relse;
}
bh_unlock_sock(sk);
--
2.34.1

2022-02-15 14:48:17

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 14/19] net: skb: introduce the function kfree_skb_list_reason()

From: Menglong Dong <[email protected]>

To report reasons of skb drops, introduce the function
kfree_skb_list_reason() and make kfree_skb_list() an inline call to
it. This function will be used in the next commit in
__dev_xmit_skb().

Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 8 +++++++-
net/core/skbuff.c | 7 ++++---
2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9e19806d9818..dc3794b60b1c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1196,10 +1196,16 @@ static inline void kfree_skb(struct sk_buff *skb)
}

void skb_release_head_state(struct sk_buff *skb);
-void kfree_skb_list(struct sk_buff *segs);
+void kfree_skb_list_reason(struct sk_buff *segs,
+ enum skb_drop_reason reason);
void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
void skb_tx_error(struct sk_buff *skb);

+static inline void kfree_skb_list(struct sk_buff *segs)
+{
+ kfree_skb_list_reason(segs, SKB_DROP_REASON_NOT_SPECIFIED);
+}
+
#ifdef CONFIG_TRACEPOINTS
void consume_skb(struct sk_buff *skb);
#else
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9d0388bed0c1..f0c6207f5de7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -777,16 +777,17 @@ void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
}
EXPORT_SYMBOL(kfree_skb_reason);

-void kfree_skb_list(struct sk_buff *segs)
+void kfree_skb_list_reason(struct sk_buff *segs,
+ enum skb_drop_reason reason)
{
while (segs) {
struct sk_buff *next = segs->next;

- kfree_skb(segs);
+ kfree_skb_reason(segs, reason);
segs = next;
}
}
-EXPORT_SYMBOL(kfree_skb_list);
+EXPORT_SYMBOL(kfree_skb_list_reason);

/* Dump skb information and contents.
*
--
2.34.1

2022-02-15 15:06:33

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 01/19] net: tcp: introduce tcp_drop_reason()

From: Menglong Dong <[email protected]>

For TCP protocol, tcp_drop() is used to free the skb when it needs
to be dropped. To make use of kfree_skb_reason() and collect drop
reasons, introduce the function tcp_drop_reason().

tcp_drop_reason() will finally call kfree_skb_reason() and pass the
drop reason to 'kfree_skb' tracepoint.

PS: __kfree_skb() was used in tcp_drop(), I'm not sure if it's ok
to replace it with kfree_skb_reason().

Signed-off-by: Menglong Dong <[email protected]>
---
net/ipv4/tcp_input.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index af94a6d22a9d..e3811afd1756 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4684,10 +4684,19 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
return res;
}

-static void tcp_drop(struct sock *sk, struct sk_buff *skb)
+static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason reason)
{
sk_drops_add(sk, skb);
- __kfree_skb(skb);
+ /* why __kfree_skb() used here before, other than kfree_skb()?
+ * confusing......
+ */
+ kfree_skb_reason(skb, reason);
+}
+
+static inline void tcp_drop(struct sock *sk, struct sk_buff *skb)
+{
+ tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED);
}

/* This one checks to see if we can put data from the
--
2.34.1

2022-02-15 15:33:09

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 04/19] net: tcp: add skb drop reasons to tcp_v{4,6}_inbound_md5_hash()

From: Menglong Dong <[email protected]>

Pass the address of drop reason to tcp_v4_inbound_md5_hash() and
tcp_v6_inbound_md5_hash() to store the reasons for skb drops when this
function fails. Therefore, the drop reason can be passed to
kfree_skb_reason() when the skb needs to be freed.

Following drop reasons are added:

SKB_DROP_REASON_TCP_MD5NOTFOUND
SKB_DROP_REASON_TCP_MD5UNEXPECTED
SKB_DROP_REASON_TCP_MD5FAILURE

Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 7 +++++++
include/trace/events/skb.h | 4 ++++
net/ipv4/tcp_ipv4.c | 13 +++++++++----
net/ipv6/tcp_ipv6.c | 11 ++++++++---
4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a5adbf6b51e8..aea46b38cffa 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -346,6 +346,13 @@ enum skb_drop_reason {
* udp packet drop out of
* udp_memory_allocated.
*/
+ SKB_DROP_REASON_TCP_MD5NOTFOUND, /* No MD5 hash and one
+ * expected
+ */
+ SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not
+ * expecting one
+ */
+ SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong */
SKB_DROP_REASON_MAX,
};

diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index cfcfd26399f7..46c06b0be850 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -27,6 +27,10 @@
EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \
EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF) \
EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM) \
+ EM(SKB_DROP_REASON_TCP_MD5NOTFOUND, TCP_MD5NOTFOUND) \
+ EM(SKB_DROP_REASON_TCP_MD5UNEXPECTED, \
+ TCP_MD5UNEXPECTED) \
+ EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \
EMe(SKB_DROP_REASON_MAX, MAX)

#undef EM
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a93921fb498f..3e7ab605dddc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1412,7 +1412,8 @@ EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
/* Called with rcu_read_lock() */
static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb,
- int dif, int sdif)
+ int dif, int sdif,
+ enum skb_drop_reason *reason)
{
#ifdef CONFIG_TCP_MD5SIG
/*
@@ -1445,11 +1446,13 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
return false;

if (hash_expected && !hash_location) {
+ *reason = SKB_DROP_REASON_TCP_MD5NOTFOUND;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
return true;
}

if (!hash_expected && hash_location) {
+ *reason = SKB_DROP_REASON_TCP_MD5UNEXPECTED;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
return true;
}
@@ -1462,6 +1465,7 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
NULL, skb);

if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ *reason = SKB_DROP_REASON_TCP_MD5FAILURE;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
&iph->saddr, ntohs(th->source),
@@ -1971,13 +1975,13 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
int tcp_v4_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ enum skb_drop_reason drop_reason;
int sdif = inet_sdif(skb);
int dif = inet_iif(skb);
const struct iphdr *iph;
const struct tcphdr *th;
bool refcounted;
struct sock *sk;
- int drop_reason;
int ret;

drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -2025,7 +2029,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
struct sock *nsk;

sk = req->rsk_listener;
- if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
+ if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif,
+ &drop_reason))) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -2099,7 +2104,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
goto discard_and_relse;
}

- if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))
+ if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif, &drop_reason))
goto discard_and_relse;

nf_reset_ct(skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 402ffbacc371..dfefbc1eac5d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -775,7 +775,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,

static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb,
- int dif, int sdif)
+ int dif, int sdif,
+ enum skb_drop_reason *reason)
{
#ifdef CONFIG_TCP_MD5SIG
const __u8 *hash_location = NULL;
@@ -798,11 +799,13 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
return false;

if (hash_expected && !hash_location) {
+ *reason = SKB_DROP_REASON_TCP_MD5NOTFOUND;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
return true;
}

if (!hash_expected && hash_location) {
+ *reason = SKB_DROP_REASON_TCP_MD5UNEXPECTED;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
return true;
}
@@ -813,6 +816,7 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
NULL, skb);

if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ *reason = SKB_DROP_REASON_TCP_MD5FAILURE;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
genhash ? "failed" : "mismatch",
@@ -1681,7 +1685,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
struct sock *nsk;

sk = req->rsk_listener;
- if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
+ if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif,
+ &drop_reason)) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -1752,7 +1757,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
goto discard_and_relse;
}

- if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
+ if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif, &drop_reason))
goto discard_and_relse;

if (tcp_filter(sk, skb)) {
--
2.34.1

2022-02-15 15:38:17

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 09/19] net: tcp: use tcp_drop_reason() for tcp_data_queue_ofo()

From: Menglong Dong <[email protected]>

Replace tcp_drop() used in tcp_data_queue_ofo with tcp_drop_reason().
Following drop reasons are introduced:

SKB_DROP_REASON_TCP_OFOMERGE

Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 3 +++
include/trace/events/skb.h | 1 +
net/ipv4/tcp_input.c | 10 ++++++----
3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 62a0d7d78f6f..73ed01d87e43 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -371,6 +371,9 @@ enum skb_drop_reason {
* the right edges of receive
* window
*/
+ SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in
+ * the ofo queue.
+ */
SKB_DROP_REASON_MAX,
};

diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index cc1c8f7eaf72..2ab7193313aa 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -36,6 +36,7 @@
EM(SKB_DROP_REASON_TCP_ZEROWINDOW, TCP_ZEROWINDOW) \
EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \
EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \
+ EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \
EMe(SKB_DROP_REASON_MAX, MAX)

#undef EM
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c042711fb5a2..cb6ad47733f1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4782,7 +4782,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
sk->sk_data_ready(sk);
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM);
return;
}

@@ -4845,7 +4845,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* All the bits are present. Drop. */
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb,
+ SKB_DROP_REASON_TCP_OFOMERGE);
skb = NULL;
tcp_dsack_set(sk, seq, end_seq);
goto add_sack;
@@ -4864,7 +4865,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb1);
+ tcp_drop_reason(sk, skb1,
+ SKB_DROP_REASON_TCP_OFOMERGE);
goto merge_right;
}
} else if (tcp_ooo_try_coalesce(sk, skb1,
@@ -4892,7 +4894,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb1);
+ tcp_drop_reason(sk, skb1, SKB_DROP_REASON_TCP_OFOMERGE);
}
/* If there is no skb after us, we are the last_skb ! */
if (!skb1)
--
2.34.1

2022-02-15 15:45:24

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 16/19] net: dev: use kfree_skb_reason() for enqueue_to_backlog()

From: Menglong Dong <[email protected]>

Replace kfree_skb() used in enqueue_to_backlog() with
kfree_skb_reason(). The skb drop reason SKB_DROP_REASON_CPU_BACKLOG is
introduced for the case of failing to enqueue the skb to the per CPU
backlog queue. The further reason can be backlog queue full or RPS
flow limition, and I think we needn't to make further distinctions.

Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 6 ++++++
include/trace/events/skb.h | 1 +
net/core/dev.c | 6 +++++-
3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0f7e5177dbaf..d59fdcd98278 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -404,6 +404,12 @@ enum skb_drop_reason {
* outputting (failed to enqueue to
* current qdisc)
*/
+ SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to
+ * the per CPU backlog queue. This
+ * can be caused by backlog queue
+ * full (see netdev_max_backlog in
+ * net.rst) or RPS flow limit
+ */
SKB_DROP_REASON_MAX,
};

diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 356bea7567b5..a1c235daf23b 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -45,6 +45,7 @@
EM(SKB_DROP_REASON_NEIGH_QUEUEFULL, NEIGH_QUEUEFULL) \
EM(SKB_DROP_REASON_QDISC_EGRESS, QDISC_EGRESS) \
EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \
+ EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \
EMe(SKB_DROP_REASON_MAX, MAX)

#undef EM
diff --git a/net/core/dev.c b/net/core/dev.c
index 55e890964fe2..8fee7adfca88 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4520,10 +4520,12 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
unsigned int *qtail)
{
+ enum skb_drop_reason reason;
struct softnet_data *sd;
unsigned long flags;
unsigned int qlen;

+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
sd = &per_cpu(softnet_data, cpu);

local_irq_save(flags);
@@ -4550,6 +4552,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
____napi_schedule(sd, &sd->backlog);
}
goto enqueue;
+ } else {
+ reason = SKB_DROP_REASON_CPU_BACKLOG;
}

drop:
@@ -4559,7 +4563,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
local_irq_restore(flags);

atomic_long_inc(&skb->dev->rx_dropped);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}

--
2.34.1

2022-02-15 15:54:28

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 11/19] net: neigh: use kfree_skb_reason() for __neigh_event_send()

From: Menglong Dong <[email protected]>

Replace kfree_skb() used in __neigh_event_send() with
kfree_skb_reason(). Following drop reasons are added:

SKB_DROP_REASON_NEIGH_FAILED
SKB_DROP_REASON_NEIGH_QUEUEFULL

The two reasons above should be the hot path that skb drops in neighbour
layer.

Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 9 +++++++++
include/trace/events/skb.h | 2 ++
net/core/neighbour.c | 4 ++--
3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c7394b4790a0..136af29be256 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -387,6 +387,15 @@ enum skb_drop_reason {
* see the doc for disable_ipv6
* in ip-sysctl.rst for detail
*/
+ SKB_DROP_REASON_NEIGH_FAILED, /* dropped as the state of
+ * neighbour is NUD_FAILED
+ */
+ SKB_DROP_REASON_NEIGH_QUEUEFULL, /* the skbs that waiting
+ * for sending on the queue
+ * of neigh->arp_queue is
+ * full, and the skbs on the
+ * tail will be dropped
+ */
SKB_DROP_REASON_MAX,
};

diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 47dedef7b6b8..dd06366ded4a 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -41,6 +41,8 @@
EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \
BPF_CGROUP_EGRESS) \
EM(SKB_DROP_REASON_IPV6DSIABLED, IPV6DSIABLED) \
+ EM(SKB_DROP_REASON_NEIGH_FAILED, NEIGH_FAILED) \
+ EM(SKB_DROP_REASON_NEIGH_QUEUEFULL, NEIGH_QUEUEFULL) \
EMe(SKB_DROP_REASON_MAX, MAX)

#undef EM
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ec0bf737b076..c353834e8fa9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1171,7 +1171,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);

- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
@@ -1193,7 +1193,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
if (!buff)
break;
neigh->arp_queue_len_bytes -= buff->truesize;
- kfree_skb(buff);
+ kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
--
2.34.1

2022-02-15 16:21:04

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH net-next 00/19] net: add skb drop reasons for TCP, IP, dev and neigh

On Tue, 15 Feb 2022 19:27:53 +0800 [email protected] wrote:
> From: Menglong Dong <[email protected]>
>
> In this series patches, reasons for skb drops are added to TCP, IP, dev
> and neigh.
>
> For TCP layer, the path of TCP data receive and enqueue are considered.
> However, it's more complex for TCP state processing, as I find that it's
> hard to report skb drop reasons to where it is freed. For example,
> when skb is dropped in tcp_rcv_state_process(), the reason can be caused
> by the call of tcp_v4_conn_request(), and it's hard to return a drop
> reason from tcp_v4_conn_request(). So I just skip such case for this
> moment.
>
> For IP layer, skb drop reasons are added to the packet outputting path.
> Seems the reasons are not complex, so I didn't split the commits by
> functions.
>
> For neighbour part, SKB_DROP_REASON_NEIGH_FAILED and
> SKB_DROP_REASON_NEIGH_QUEUEFULL are added.
>
> For link layer, reasons are added for both packet inputting and
> outputting path.
>
> The amount of patches in this series seems a bit too many, maybe I should
> join some of them? For example, combine the patches of dev to one.

This series does not apply cleanly.

There's no reason to send 19 patches at a time. Please try to send
smaller series, that's are easier to review, under 10 patches
preferably, certainly under 15.

2022-02-15 19:15:22

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 08/19] net: tcp: use tcp_drop_reason() for tcp_data_queue()

From: Menglong Dong <[email protected]>

Replace tcp_drop() used in tcp_data_queue() with tcp_drop_reason().
Following drop reasons are introduced:

SKB_DROP_REASON_TCP_ZEROWINDOW
SKB_DROP_REASON_TCP_OLD_DATA
SKB_DROP_REASON_TCP_OVERWINDOW

SKB_DROP_REASON_TCP_OLD_DATA is used for the case that end_seq of skb
less than the left edges of receive window. (Maybe there is a better
name?)

Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 13 +++++++++++++
include/trace/events/skb.h | 3 +++
net/ipv4/tcp_input.c | 13 +++++++++++--
3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dcf9d8bd0079..62a0d7d78f6f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -358,6 +358,19 @@ enum skb_drop_reason {
* LINUX_MIB_TCPBACKLOGDROP)
*/
SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */
+ SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero,
+ * see LINUX_MIB_TCPZEROWINDOWDROP
+ */
+ SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already
+ * received before (spurious retrans
+ * may happened), see
+ * LINUX_MIB_DELAYEDACKLOST
+ */
+ SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window,
+ * the seq of the first byte exceed
+ * the right edges of receive
+ * window
+ */
SKB_DROP_REASON_MAX,
};

diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index d332e7313a61..cc1c8f7eaf72 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -33,6 +33,9 @@
EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \
EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG) \
EM(SKB_DROP_REASON_TCP_FLAGS, TCP_FLAGS) \
+ EM(SKB_DROP_REASON_TCP_ZEROWINDOW, TCP_ZEROWINDOW) \
+ EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \
+ EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \
EMe(SKB_DROP_REASON_MAX, MAX)

#undef EM
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8cb0ea34aa49..c042711fb5a2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4991,6 +4991,7 @@ void tcp_data_ready(struct sock *sk)
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ enum skb_drop_reason reason;
bool fragstolen;
int eaten;

@@ -5009,6 +5010,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
skb_dst_drop(skb);
__skb_pull(skb, tcp_hdr(skb)->doff * 4);

+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
tp->rx_opt.dsack = 0;

/* Queue data for delivery to the user.
@@ -5017,6 +5019,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
if (tcp_receive_window(tp) == 0) {
+ reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
goto out_of_window;
}
@@ -5026,6 +5029,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (skb_queue_len(&sk->sk_receive_queue) == 0)
sk_forced_mem_schedule(sk, skb->truesize);
else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+ reason = SKB_DROP_REASON_PROTO_MEM;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
sk->sk_data_ready(sk);
goto drop;
@@ -5062,6 +5066,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
tcp_rcv_spurious_retrans(sk, skb);
/* A retransmit, 2nd most common case. Force an immediate ack. */
+ reason = SKB_DROP_REASON_TCP_OLD_DATA;
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);

@@ -5069,13 +5074,16 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_schedule_ack(sk);
drop:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
return;
}

/* Out of window. F.e. zero window probe. */
- if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
+ if (!before(TCP_SKB_CB(skb)->seq,
+ tp->rcv_nxt + tcp_receive_window(tp))) {
+ reason = SKB_DROP_REASON_TCP_OVERWINDOW;
goto out_of_window;
+ }

if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
/* Partial packet, seq < rcv_next < end_seq */
@@ -5085,6 +5093,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
* remembering D-SACK for its head made in previous line.
*/
if (!tcp_receive_window(tp)) {
+ reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
goto out_of_window;
}
--
2.34.1

2022-02-15 20:49:33

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 17/19] net: dev: use kfree_skb_reason() for do_xdp_generic()

From: Menglong Dong <[email protected]>

Replace kfree_skb() used in do_xdp_generic() with kfree_skb_reason().
The drop reason SKB_DROP_REASON_XDP is introduced for this case.

Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 1 +
include/trace/events/skb.h | 1 +
net/core/dev.c | 2 +-
3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d59fdcd98278..79b24d5f491d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -410,6 +410,7 @@ enum skb_drop_reason {
* full (see netdev_max_backlog in
* net.rst) or RPS flow limit
*/
+ SKB_DROP_REASON_XDP, /* dropped by XDP in input path */
SKB_DROP_REASON_MAX,
};

diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index a1c235daf23b..7bc46414a81b 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -46,6 +46,7 @@
EM(SKB_DROP_REASON_QDISC_EGRESS, QDISC_EGRESS) \
EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \
EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \
+ EM(SKB_DROP_REASON_XDP, XDP) \
EMe(SKB_DROP_REASON_MAX, MAX)

#undef EM
diff --git a/net/core/dev.c b/net/core/dev.c
index 8fee7adfca88..a2548b7f2708 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4783,7 +4783,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
}
return XDP_PASS;
out_redir:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
EXPORT_SYMBOL_GPL(do_xdp_generic);
--
2.34.1

2022-02-15 20:52:15

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 10/19] net: ip: add skb drop reasons during ip outputting

From: Menglong Dong <[email protected]>

As kfree_skb() is not used frequently during packet outputting in ip
layer, we do this job (add reasons for skb drops) at once.

kfree_skb() is replaced by kfree_skb_reason() in following functions:

__ip_queue_xmit(), ip_finish_output(), ip_mc_finish_output(),
ip6_output(), ip6_finish_output(), ip6_finish_output2()

and following drop reasons are added:

SKB_DROP_REASON_IP_OUTNOROUTES
SKB_DROP_REASON_BPF_CGROUP_EGRESS
SKB_DROP_REASON_IPV6DSIABLED

Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 13 +++++++++++++
include/trace/events/skb.h | 4 ++++
net/ipv4/ip_output.c | 6 +++---
net/ipv6/ip6_output.c | 6 +++---
4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 73ed01d87e43..c7394b4790a0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -374,6 +374,19 @@ enum skb_drop_reason {
SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in
* the ofo queue.
*/
+ SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed during
+ * packet outputting
+ */
+ SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by eBPF program
+ * with type of BPF_PROG_TYPE_CGROUP_SKB
+ * and attach type of
+ * BPF_CGROUP_INET_EGRESS
+ * during packet sending
+ */
+ SKB_DROP_REASON_IPV6DSIABLED, /* IPv6 is disabled on the device,
+ * see the doc for disable_ipv6
+ * in ip-sysctl.rst for detail
+ */
SKB_DROP_REASON_MAX,
};

diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 2ab7193313aa..47dedef7b6b8 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -37,6 +37,10 @@
EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \
EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \
EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \
+ EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \
+ EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \
+ BPF_CGROUP_EGRESS) \
+ EM(SKB_DROP_REASON_IPV6DSIABLED, IPV6DSIABLED) \
EMe(SKB_DROP_REASON_MAX, MAX)

#undef EM
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0c0574eb5f5b..df549b7415fb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -317,7 +317,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
case NET_XMIT_CN:
return __ip_finish_output(net, sk, skb) ? : ret;
default:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
return ret;
}
}
@@ -337,7 +337,7 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
case NET_XMIT_SUCCESS:
break;
default:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
return ret;
}

@@ -536,7 +536,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
no_route:
rcu_read_unlock();
IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_OUTNOROUTES);
return -EHOSTUNREACH;
}
EXPORT_SYMBOL(__ip_queue_xmit);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0c6c971ce0a5..4cd9e5fd25e4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -130,7 +130,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
rcu_read_unlock_bh();

IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_OUTNOROUTES);
return -EINVAL;
}

@@ -202,7 +202,7 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
case NET_XMIT_CN:
return __ip6_finish_output(net, sk, skb) ? : ret;
default:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
return ret;
}
}
@@ -217,7 +217,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)

if (unlikely(idev->cnf.disable_ipv6)) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DSIABLED);
return 0;
}

--
2.34.1

2022-02-16 00:30:18

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH net-next 01/19] net: tcp: introduce tcp_drop_reason()

On Tue, Feb 15, 2022 at 3:30 AM <[email protected]> wrote:
>
> From: Menglong Dong <[email protected]>
>
> For TCP protocol, tcp_drop() is used to free the skb when it needs
> to be dropped. To make use of kfree_skb_reason() and collect drop
> reasons, introduce the function tcp_drop_reason().
>
> tcp_drop_reason() will finally call kfree_skb_reason() and pass the
> drop reason to 'kfree_skb' tracepoint.
>
> PS: __kfree_skb() was used in tcp_drop(), I'm not sure if it's ok
> to replace it with kfree_skb_reason().
>
> Signed-off-by: Menglong Dong <[email protected]>
> ---
> net/ipv4/tcp_input.c | 13 +++++++++++--
> 1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index af94a6d22a9d..e3811afd1756 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4684,10 +4684,19 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
> return res;
> }
>
> -static void tcp_drop(struct sock *sk, struct sk_buff *skb)
> +static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
> + enum skb_drop_reason reason)
> {
> sk_drops_add(sk, skb);
> - __kfree_skb(skb);
> + /* why __kfree_skb() used here before, other than kfree_skb()?
> + * confusing......

Do not add comments like that if you do not know the difference...

__kfree_skb() is used by TCP stack because it owns skb in receive
queues, and avoids touching skb->users
because it must be one already.

(We made sure not using skb_get() in TCP)

It seems fine to use kfree_skb() in tcp_drop(), it is hardly fast
path, and the added cost is pure noise.

2022-02-16 03:36:23

by David Ahern

[permalink] [raw]
Subject: Re: [PATCH net-next 00/19] net: add skb drop reasons for TCP, IP, dev and neigh

On 2/15/22 9:04 AM, Jakub Kicinski wrote:
> There's no reason to send 19 patches at a time. Please try to send
> smaller series, that's are easier to review, under 10 patches
> preferably, certainly under 15.

+1. It takes time to review code paths and make sure the changes are
correct.

Send the first 9 as set; those target the TCP stack and then wait for
them to be merged before sending more.

2022-02-16 06:38:01

by David Ahern

[permalink] [raw]
Subject: Re: [PATCH net-next 01/19] net: tcp: introduce tcp_drop_reason()

On 2/15/22 10:34 AM, Eric Dumazet wrote:
>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>> index af94a6d22a9d..e3811afd1756 100644
>> --- a/net/ipv4/tcp_input.c
>> +++ b/net/ipv4/tcp_input.c
>> @@ -4684,10 +4684,19 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
>> return res;
>> }
>>
>> -static void tcp_drop(struct sock *sk, struct sk_buff *skb)
>> +static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
>> + enum skb_drop_reason reason)
>> {
>> sk_drops_add(sk, skb);
>> - __kfree_skb(skb);
>> + /* why __kfree_skb() used here before, other than kfree_skb()?
>> + * confusing......
>
> Do not add comments like that if you do not know the difference...
>
> __kfree_skb() is used by TCP stack because it owns skb in receive
> queues, and avoids touching skb->users
> because it must be one already.

and it bypasses kfree_skb tracepoint which seems by design.

2022-02-16 06:38:37

by Menglong Dong

[permalink] [raw]
Subject: Re: [PATCH net-next 00/19] net: add skb drop reasons for TCP, IP, dev and neigh

On Wed, Feb 16, 2022 at 12:09 AM David Ahern <[email protected]> wrote:
>
> On 2/15/22 9:04 AM, Jakub Kicinski wrote:
> > There's no reason to send 19 patches at a time. Please try to send
> > smaller series, that's are easier to review, under 10 patches
> > preferably, certainly under 15.
>
> +1. It takes time to review code paths and make sure the changes are
> correct.
>
> Send the first 9 as set; those target the TCP stack and then wait for
> them to be merged before sending more.

Ok, I'll make the amount of patches at a proper level, thanks!

2022-02-16 07:08:34

by Menglong Dong

[permalink] [raw]
Subject: Re: [PATCH net-next 01/19] net: tcp: introduce tcp_drop_reason()

On Wed, Feb 16, 2022 at 1:34 AM Eric Dumazet <[email protected]> wrote:
>
> On Tue, Feb 15, 2022 at 3:30 AM <[email protected]> wrote:
> >
> > From: Menglong Dong <[email protected]>
> >
> > For TCP protocol, tcp_drop() is used to free the skb when it needs
> > to be dropped. To make use of kfree_skb_reason() and collect drop
> > reasons, introduce the function tcp_drop_reason().
> >
> > tcp_drop_reason() will finally call kfree_skb_reason() and pass the
> > drop reason to 'kfree_skb' tracepoint.
> >
> > PS: __kfree_skb() was used in tcp_drop(), I'm not sure if it's ok
> > to replace it with kfree_skb_reason().
> >
> > Signed-off-by: Menglong Dong <[email protected]>
> > ---
> > net/ipv4/tcp_input.c | 13 +++++++++++--
> > 1 file changed, 11 insertions(+), 2 deletions(-)
> >
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index af94a6d22a9d..e3811afd1756 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -4684,10 +4684,19 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
> > return res;
> > }
> >
> > -static void tcp_drop(struct sock *sk, struct sk_buff *skb)
> > +static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
> > + enum skb_drop_reason reason)
> > {
> > sk_drops_add(sk, skb);
> > - __kfree_skb(skb);
> > + /* why __kfree_skb() used here before, other than kfree_skb()?
> > + * confusing......
>
> Do not add comments like that if you do not know the difference...
>
> __kfree_skb() is used by TCP stack because it owns skb in receive
> queues, and avoids touching skb->users
> because it must be one already.
>
> (We made sure not using skb_get() in TCP)
>
> It seems fine to use kfree_skb() in tcp_drop(), it is hardly fast
> path, and the added cost is pure noise.

I understand why __kfree_skb() was used now, and it seems
this commit is ok (with the comments removed of course). I'll
keep it still.

Thanks!
Menglong Dong

2022-02-16 07:22:13

by Menglong Dong

[permalink] [raw]
Subject: Re: [PATCH net-next 01/19] net: tcp: introduce tcp_drop_reason()

On Wed, Feb 16, 2022 at 2:47 AM David Ahern <[email protected]> wrote:
>
> On 2/15/22 10:34 AM, Eric Dumazet wrote:
> >> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> >> index af94a6d22a9d..e3811afd1756 100644
> >> --- a/net/ipv4/tcp_input.c
> >> +++ b/net/ipv4/tcp_input.c
> >> @@ -4684,10 +4684,19 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
> >> return res;
> >> }
> >>
> >> -static void tcp_drop(struct sock *sk, struct sk_buff *skb)
> >> +static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
> >> + enum skb_drop_reason reason)
> >> {
> >> sk_drops_add(sk, skb);
> >> - __kfree_skb(skb);
> >> + /* why __kfree_skb() used here before, other than kfree_skb()?
> >> + * confusing......
> >
> > Do not add comments like that if you do not know the difference...
> >
> > __kfree_skb() is used by TCP stack because it owns skb in receive
> > queues, and avoids touching skb->users
> > because it must be one already.
>
> and it bypasses kfree_skb tracepoint which seems by design.

Do you mean it shouldn't be traced here?
According to my understanding, __kfree_skb() was used in the
beginning as skb->users aren't touched by TCP. Later,
tcp_drop() was introduced to record drop count to the socket.

Considering the skb is indeed dropped and no other event is triggered,
is it ok to trigger the kfree_skb tracepoint?

Thanks!
Menglong Dong

2022-02-16 07:29:48

by Menglong Dong

[permalink] [raw]
Subject: [PATCH net-next 03/19] net: tcp: use kfree_skb_reason() for tcp_v6_rcv()

From: Menglong Dong <[email protected]>

Replace kfree_skb() used in tcp_v6_rcv() with kfree_skb_reason().

Signed-off-by: Menglong Dong <[email protected]>
---
net/ipv6/tcp_ipv6.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0c648bf07f39..402ffbacc371 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1627,6 +1627,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,

INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason drop_reason;
int sdif = inet6_sdif(skb);
int dif = inet6_iif(skb);
const struct tcphdr *th;
@@ -1636,6 +1637,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
int ret;
struct net *net = dev_net(skb->dev);

+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
goto discard_it;

@@ -1649,8 +1651,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)

th = (const struct tcphdr *)skb->data;

- if (unlikely(th->doff < sizeof(struct tcphdr)/4))
+ if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
+ drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
goto bad_packet;
+ }
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;

@@ -1706,6 +1710,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
+ } else {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
}
if (!nsk) {
reqsk_put(req);
@@ -1741,14 +1747,18 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
}
}

- if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto discard_and_relse;
+ }

if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
goto discard_and_relse;

- if (tcp_filter(sk, skb))
+ if (tcp_filter(sk, skb)) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto discard_and_relse;
+ }
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
@@ -1779,13 +1789,17 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
return ret ? -1 : 0;

no_tcp_socket:
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ drop_reason = SKB_DROP_REASON_NO_SOCKET;
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto discard_it;
+ }

tcp_v6_fill_cb(skb, hdr, th);

if (tcp_checksum_complete(skb)) {
csum_error:
+ drop_reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
@@ -1795,7 +1809,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
}

discard_it:
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return 0;

discard_and_relse:
@@ -1806,6 +1820,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)

do_time_wait:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
--
2.34.1