From: Menglong Dong <[email protected]>
In the commit c504e5c2f964 ("net: skb: introduce kfree_skb_reason()"),
we added the support of reporting the reasons of skb drops to kfree_skb
tracepoint. And in this series patches, reasons for skb drops are added
to ICMP protocol.
In order to report the reasons of skb drops in 'sock_queue_rcv_skb()',
the function 'sock_queue_rcv_skb_reason()' is introduced in the 1th
patch, which is used in the 2th patch.
In the 2th patch, we introduce the new function __ping_queue_rcv_skb()
to report drop reasons by its return value.
In the 3th patch, we make ICMP message handler functions return drop
reasons, which means we change the return type of 'handler()' in
'struct icmp_control' from 'bool' to 'enum skb_drop_reason'. This
changed its original intention, as 'false' means failure, but
'SKB_NOT_DROPPED_YET', which is 0, means success now. Therefore, we
have to change all usages of these handler. Following "handler" functions
are involved:
icmp_unreach()
icmp_redirect()
icmp_echo()
icmp_timestamp()
icmp_discard()
And following drop reasons are added(what they mean can be see
in the document for them):
SKB_DROP_REASON_ICMP_CSUM
SKB_DROP_REASON_ICMP_TYPE
SKB_DROP_REASON_ICMP_BROADCAST
Changes since v2:
- fix aliegnment problem in the 2th patch
Changes since v1:
- introduce __ping_queue_rcv_skb() instead of change the return value
of ping_queue_rcv_skb() in the 2th patch, as Paolo suggested
Menglong Dong (3):
net: sock: introduce sock_queue_rcv_skb_reason()
net: icmp: introduce __ping_queue_rcv_skb() to report drop reasons
net: icmp: add reasons of the skb drops to icmp protocol
include/linux/skbuff.h | 5 +++
include/net/ping.h | 2 +-
include/net/sock.h | 9 ++++-
include/trace/events/skb.h | 3 ++
net/core/sock.c | 30 ++++++++++++---
net/ipv4/icmp.c | 75 ++++++++++++++++++++++----------------
net/ipv4/ping.c | 32 ++++++++++------
net/ipv6/icmp.c | 24 +++++++-----
8 files changed, 121 insertions(+), 59 deletions(-)
--
2.35.1
From: Menglong Dong <[email protected]>
Replace kfree_skb() used in icmp_rcv() and icmpv6_rcv() with
kfree_skb_reason().
In order to get the reasons of the skb drops after icmp message handle,
we change the return type of 'handler()' in 'struct icmp_control' from
'bool' to 'enum skb_drop_reason'. This may change its original
intention, as 'false' means failure, but 'SKB_NOT_DROPPED_YET' means
success now. Therefore, all 'handler' and the call of them need to be
handled. Following 'handler' functions are involved:
icmp_unreach()
icmp_redirect()
icmp_echo()
icmp_timestamp()
icmp_discard()
And following new drop reasons are added:
SKB_DROP_REASON_ICMP_CSUM
SKB_DROP_REASON_ICMP_TYPE
SKB_DROP_REASON_ICMP_BROADCAST
Reviewed-by: Hao Peng <[email protected]>
Reviewed-by: Jiang Biao <[email protected]>
Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 5 +++
include/net/ping.h | 2 +-
include/trace/events/skb.h | 3 ++
net/ipv4/icmp.c | 75 ++++++++++++++++++++++----------------
net/ipv4/ping.c | 14 ++++---
net/ipv6/icmp.c | 24 +++++++-----
6 files changed, 76 insertions(+), 47 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 26538ceb4b01..18c678b340d3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -444,6 +444,11 @@ enum skb_drop_reason {
SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented
* at tun/tap, e.g., check_filter()
*/
+ SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */
+ SKB_DROP_REASON_ICMP_TYPE, /* unknown ICMP type */
+ SKB_DROP_REASON_ICMP_BROADCAST, /* unacceptable broadcast(multicast)
+ * ICMP message
+ */
SKB_DROP_REASON_MAX,
};
diff --git a/include/net/ping.h b/include/net/ping.h
index 2fe78874318c..b68fbfdb606f 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -76,7 +76,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
void *user_icmph, size_t icmph_len);
int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-bool ping_rcv(struct sk_buff *skb);
+enum skb_drop_reason ping_rcv(struct sk_buff *skb);
#ifdef CONFIG_PROC_FS
void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family);
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index e1670e1e4934..70d0dac8e08b 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -61,6 +61,9 @@
EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \
EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \
EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \
+ EM(SKB_DROP_REASON_ICMP_CSUM, ICMP_CSUM) \
+ EM(SKB_DROP_REASON_ICMP_TYPE, ICMP_TYPE) \
+ EM(SKB_DROP_REASON_ICMP_BROADCAST, ICMP_BROADCAST) \
EMe(SKB_DROP_REASON_MAX, MAX)
#undef EM
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 72a375c7f417..97e53f86b14b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -186,7 +186,7 @@ EXPORT_SYMBOL(icmp_err_convert);
*/
struct icmp_control {
- bool (*handler)(struct sk_buff *skb);
+ enum skb_drop_reason (*handler)(struct sk_buff *skb);
short error; /* This ICMP is classed as an error message */
};
@@ -839,8 +839,9 @@ static bool icmp_tag_validation(int proto)
* ICMP_PARAMETERPROB.
*/
-static bool icmp_unreach(struct sk_buff *skb)
+static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_NOT_DROPPED_YET;
const struct iphdr *iph;
struct icmphdr *icmph;
struct net *net;
@@ -860,8 +861,10 @@ static bool icmp_unreach(struct sk_buff *skb)
icmph = icmp_hdr(skb);
iph = (const struct iphdr *)skb->data;
- if (iph->ihl < 5) /* Mangled header, drop. */
+ if (iph->ihl < 5) { /* Mangled header, drop. */
+ reason = SKB_DROP_REASON_IP_INHDR;
goto out_err;
+ }
switch (icmph->type) {
case ICMP_DEST_UNREACH:
@@ -941,10 +944,10 @@ static bool icmp_unreach(struct sk_buff *skb)
icmp_socket_deliver(skb, info);
out:
- return true;
+ return reason;
out_err:
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return false;
+ return reason ?: SKB_DROP_REASON_NOT_SPECIFIED;
}
@@ -952,20 +955,20 @@ static bool icmp_unreach(struct sk_buff *skb)
* Handle ICMP_REDIRECT.
*/
-static bool icmp_redirect(struct sk_buff *skb)
+static enum skb_drop_reason icmp_redirect(struct sk_buff *skb)
{
if (skb->len < sizeof(struct iphdr)) {
__ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
- return false;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
}
if (!pskb_may_pull(skb, sizeof(struct iphdr))) {
/* there aught to be a stat */
- return false;
+ return SKB_DROP_REASON_NOMEM;
}
icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/*
@@ -982,7 +985,7 @@ static bool icmp_redirect(struct sk_buff *skb)
* See also WRT handling of options once they are done and working.
*/
-static bool icmp_echo(struct sk_buff *skb)
+static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
{
struct icmp_bxm icmp_param;
struct net *net;
@@ -990,7 +993,7 @@ static bool icmp_echo(struct sk_buff *skb)
net = dev_net(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */
if (net->ipv4.sysctl_icmp_echo_ignore_all)
- return true;
+ return SKB_NOT_DROPPED_YET;
icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.skb = skb;
@@ -1001,10 +1004,10 @@ static bool icmp_echo(struct sk_buff *skb)
if (icmp_param.data.icmph.type == ICMP_ECHO)
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
else if (!icmp_build_probe(skb, &icmp_param.data.icmph))
- return true;
+ return SKB_NOT_DROPPED_YET;
icmp_reply(&icmp_param, skb);
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/* Helper for icmp_echo and icmpv6_echo_reply.
@@ -1122,7 +1125,7 @@ EXPORT_SYMBOL_GPL(icmp_build_probe);
* MUST be accurate to a few minutes.
* MUST be updated at least at 15Hz.
*/
-static bool icmp_timestamp(struct sk_buff *skb)
+static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
{
struct icmp_bxm icmp_param;
/*
@@ -1147,17 +1150,17 @@ static bool icmp_timestamp(struct sk_buff *skb)
icmp_param.data_len = 0;
icmp_param.head_len = sizeof(struct icmphdr) + 12;
icmp_reply(&icmp_param, skb);
- return true;
+ return SKB_NOT_DROPPED_YET;
out_err:
__ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
- return false;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
}
-static bool icmp_discard(struct sk_buff *skb)
+static enum skb_drop_reason icmp_discard(struct sk_buff *skb)
{
/* pretend it was a success */
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/*
@@ -1165,18 +1168,20 @@ static bool icmp_discard(struct sk_buff *skb)
*/
int icmp_rcv(struct sk_buff *skb)
{
- struct icmphdr *icmph;
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev);
- bool success;
+ struct icmphdr *icmph;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
struct sec_path *sp = skb_sec_path(skb);
int nh;
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
- XFRM_STATE_ICMP))
+ XFRM_STATE_ICMP)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
goto drop;
@@ -1184,8 +1189,11 @@ int icmp_rcv(struct sk_buff *skb)
nh = skb_network_offset(skb);
skb_set_network_header(skb, sizeof(*icmph));
- if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+ if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN,
+ skb)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
skb_set_network_header(skb, nh);
}
@@ -1207,13 +1215,13 @@ int icmp_rcv(struct sk_buff *skb)
/* We can't use icmp_pointers[].handler() because it is an array of
* size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42.
*/
- success = icmp_echo(skb);
- goto success_check;
+ reason = icmp_echo(skb);
+ goto reason_check;
}
if (icmph->type == ICMP_EXT_ECHOREPLY) {
- success = ping_rcv(skb);
- goto success_check;
+ reason = ping_rcv(skb);
+ goto reason_check;
}
/*
@@ -1222,8 +1230,10 @@ int icmp_rcv(struct sk_buff *skb)
* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
* discarded.
*/
- if (icmph->type > NR_ICMP_TYPES)
+ if (icmph->type > NR_ICMP_TYPES) {
+ reason = SKB_DROP_REASON_ICMP_TYPE;
goto error;
+ }
/*
* Parse the ICMP message
@@ -1239,27 +1249,30 @@ int icmp_rcv(struct sk_buff *skb)
if ((icmph->type == ICMP_ECHO ||
icmph->type == ICMP_TIMESTAMP) &&
net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
+ reason = SKB_DROP_REASON_ICMP_BROADCAST;
goto error;
}
if (icmph->type != ICMP_ECHO &&
icmph->type != ICMP_TIMESTAMP &&
icmph->type != ICMP_ADDRESS &&
icmph->type != ICMP_ADDRESSREPLY) {
+ reason = SKB_DROP_REASON_ICMP_BROADCAST;
goto error;
}
}
- success = icmp_pointers[icmph->type].handler(skb);
-success_check:
- if (success) {
+ reason = icmp_pointers[icmph->type].handler(skb);
+reason_check:
+ if (!reason) {
consume_skb(skb);
return NET_RX_SUCCESS;
}
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
csum_error:
+ reason = SKB_DROP_REASON_ICMP_CSUM;
__ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
error:
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 9a1ea6c263f8..4137e5808107 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -960,12 +960,12 @@ EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
* All we need to do is get the socket.
*/
-bool ping_rcv(struct sk_buff *skb)
+enum skb_drop_reason ping_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET;
struct sock *sk;
struct net *net = dev_net(skb->dev);
struct icmphdr *icmph = icmp_hdr(skb);
- bool rc = false;
/* We assume the packet has already been checked by icmp_rcv */
@@ -980,15 +980,17 @@ bool ping_rcv(struct sk_buff *skb)
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
pr_debug("rcv on socket %p\n", sk);
- if (skb2 && !ping_queue_rcv_skb(sk, skb2))
- rc = true;
+ if (skb2)
+ reason = __ping_queue_rcv_skb(sk, skb2);
+ else
+ reason = SKB_DROP_REASON_NOMEM;
sock_put(sk);
}
- if (!rc)
+ if (reason)
pr_debug("no socket, dropping\n");
- return rc;
+ return reason;
}
EXPORT_SYMBOL_GPL(ping_rcv);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e6b978ea0e87..01c8003c9fc9 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -864,21 +864,23 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
static int icmpv6_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct net *net = dev_net(skb->dev);
struct net_device *dev = icmp6_dev(skb);
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
struct icmp6hdr *hdr;
u8 type;
- bool success = false;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
struct sec_path *sp = skb_sec_path(skb);
int nh;
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
- XFRM_STATE_ICMP))
+ XFRM_STATE_ICMP)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop_no_count;
+ }
if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
goto drop_no_count;
@@ -886,8 +888,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
nh = skb_network_offset(skb);
skb_set_network_header(skb, sizeof(*hdr));
- if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+ if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
+ skb)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop_no_count;
+ }
skb_set_network_header(skb, nh);
}
@@ -924,11 +929,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
case ICMPV6_ECHO_REPLY:
- success = ping_rcv(skb);
+ reason = ping_rcv(skb);
break;
case ICMPV6_EXT_ECHO_REPLY:
- success = ping_rcv(skb);
+ reason = ping_rcv(skb);
break;
case ICMPV6_PKT_TOOBIG:
@@ -994,19 +999,20 @@ static int icmpv6_rcv(struct sk_buff *skb)
/* until the v6 path can be better sorted assume failure and
* preserve the status quo behaviour for the rest of the paths to here
*/
- if (success)
- consume_skb(skb);
+ if (reason)
+ kfree_skb_reason(skb, reason);
else
- kfree_skb(skb);
+ consume_skb(skb);
return 0;
csum_error:
+ reason = SKB_DROP_REASON_ICMP_CSUM;
__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
}
--
2.35.1
On Wed, 16 Mar 2022 21:35:47 -0600 David Ahern wrote:
> On 3/16/22 9:18 PM, Jakub Kicinski wrote:
> >
> > I guess this set raises the follow up question to Dave if adding
> > drop reasons to places with MIB exception stats means improving
> > the granularity or one MIB stat == one reason?
>
> There are a few examples where multiple MIB stats are bumped on a drop,
> but the reason code should always be set based on first failure. Did you
> mean something else with your question?
I meant whether we want to differentiate between TYPE, and BROADCAST or
whatever other possible invalid protocol cases we can get here or just
dump them all into a single protocol error code.
From: Menglong Dong <[email protected]>
In order to report the reasons of skb drops in 'sock_queue_rcv_skb()',
introduce the function 'sock_queue_rcv_skb_reason()'.
As the return value of 'sock_queue_rcv_skb()' is used as the error code,
we can't make it as drop reason and have to pass extra output argument.
'sock_queue_rcv_skb()' is used in many places, so we can't change it
directly.
Introduce the new function 'sock_queue_rcv_skb_reason()' and make
'sock_queue_rcv_skb()' an inline call to it.
Reviewed-by: Hao Peng <[email protected]>
Reviewed-by: Jiang Biao <[email protected]>
Signed-off-by: Menglong Dong <[email protected]>
---
include/net/sock.h | 9 ++++++++-
net/core/sock.c | 30 ++++++++++++++++++++++++------
2 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index c4b91fc19b9c..1a988e605f09 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2392,7 +2392,14 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
void (*destructor)(struct sock *sk,
struct sk_buff *skb));
int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+
+int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason);
+
+static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ return sock_queue_rcv_skb_reason(sk, skb, NULL);
+}
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index 1180a0cb0110..2cae991f817e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -503,17 +503,35 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(__sock_queue_rcv_skb);
-int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason)
{
+ enum skb_drop_reason drop_reason;
int err;
err = sk_filter(sk, skb);
- if (err)
- return err;
-
- return __sock_queue_rcv_skb(sk, skb);
+ if (err) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ goto out;
+ }
+ err = __sock_queue_rcv_skb(sk, skb);
+ switch (err) {
+ case -ENOMEM:
+ drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
+ break;
+ case -ENOBUFS:
+ drop_reason = SKB_DROP_REASON_PROTO_MEM;
+ break;
+ default:
+ drop_reason = SKB_NOT_DROPPED_YET;
+ break;
+ }
+out:
+ if (reason)
+ *reason = drop_reason;
+ return err;
}
-EXPORT_SYMBOL(sock_queue_rcv_skb);
+EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
const int nested, unsigned int trim_cap, bool refcounted)
--
2.35.1
On 3/16/22 9:18 PM, Jakub Kicinski wrote:
>
> I guess this set raises the follow up question to Dave if adding
> drop reasons to places with MIB exception stats means improving
> the granularity or one MIB stat == one reason?
>
There are a few examples where multiple MIB stats are bumped on a drop,
but the reason code should always be set based on first failure. Did you
mean something else with your question?
On Wed, 16 Mar 2022 14:31:48 +0800 [email protected] wrote:
> From: Menglong Dong <[email protected]>
>
> Replace kfree_skb() used in icmp_rcv() and icmpv6_rcv() with
> kfree_skb_reason().
>
> In order to get the reasons of the skb drops after icmp message handle,
> we change the return type of 'handler()' in 'struct icmp_control' from
> 'bool' to 'enum skb_drop_reason'. This may change its original
> intention, as 'false' means failure, but 'SKB_NOT_DROPPED_YET' means
> success now. Therefore, all 'handler' and the call of them need to be
> handled. Following 'handler' functions are involved:
>
> icmp_unreach()
> icmp_redirect()
> icmp_echo()
> icmp_timestamp()
> icmp_discard()
>
> And following new drop reasons are added:
>
> SKB_DROP_REASON_ICMP_CSUM
> SKB_DROP_REASON_ICMP_TYPE
> SKB_DROP_REASON_ICMP_BROADCAST
>
> Reviewed-by: Hao Peng <[email protected]>
> Reviewed-by: Jiang Biao <[email protected]>
> Signed-off-by: Menglong Dong <[email protected]>
I guess this set raises the follow up question to Dave if adding
drop reasons to places with MIB exception stats means improving
the granularity or one MIB stat == one reason?
> -bool ping_rcv(struct sk_buff *skb)
> +enum skb_drop_reason ping_rcv(struct sk_buff *skb)
> {
> + enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET;
> struct sock *sk;
> struct net *net = dev_net(skb->dev);
> struct icmphdr *icmph = icmp_hdr(skb);
> - bool rc = false;
>
> /* We assume the packet has already been checked by icmp_rcv */
>
> @@ -980,15 +980,17 @@ bool ping_rcv(struct sk_buff *skb)
> struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
>
> pr_debug("rcv on socket %p\n", sk);
> - if (skb2 && !ping_queue_rcv_skb(sk, skb2))
> - rc = true;
> + if (skb2)
> + reason = __ping_queue_rcv_skb(sk, skb2);
> + else
> + reason = SKB_DROP_REASON_NOMEM;
> sock_put(sk);
> }
>
> - if (!rc)
> + if (reason)
> pr_debug("no socket, dropping\n");
This is going to be printed on memory allocation failures now as well.
On Thu, Mar 17, 2022 at 11:18 AM Jakub Kicinski <[email protected]> wrote:
>
[......]
> > -bool ping_rcv(struct sk_buff *skb)
> > +enum skb_drop_reason ping_rcv(struct sk_buff *skb)
> > {
> > + enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET;
> > struct sock *sk;
> > struct net *net = dev_net(skb->dev);
> > struct icmphdr *icmph = icmp_hdr(skb);
> > - bool rc = false;
> >
> > /* We assume the packet has already been checked by icmp_rcv */
> >
> > @@ -980,15 +980,17 @@ bool ping_rcv(struct sk_buff *skb)
> > struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
> >
> > pr_debug("rcv on socket %p\n", sk);
> > - if (skb2 && !ping_queue_rcv_skb(sk, skb2))
> > - rc = true;
> > + if (skb2)
> > + reason = __ping_queue_rcv_skb(sk, skb2);
> > + else
> > + reason = SKB_DROP_REASON_NOMEM;
> > sock_put(sk);
> > }
> >
> > - if (!rc)
> > + if (reason)
> > pr_debug("no socket, dropping\n");
>
> This is going to be printed on memory allocation failures now as well.
Enn...This logic is not changed. In the previous, skb2==NULL means
rc is false, and this message is printed too.
Seems this can be optimized by the way? Printing the message only for
no socket found.
On Thu, Mar 17, 2022 at 12:05 PM Jakub Kicinski <[email protected]> wrote:
>
> On Wed, 16 Mar 2022 21:35:47 -0600 David Ahern wrote:
> > On 3/16/22 9:18 PM, Jakub Kicinski wrote:
> > >
> > > I guess this set raises the follow up question to Dave if adding
> > > drop reasons to places with MIB exception stats means improving
> > > the granularity or one MIB stat == one reason?
> >
> > There are a few examples where multiple MIB stats are bumped on a drop,
> > but the reason code should always be set based on first failure. Did you
> > mean something else with your question?
>
> I meant whether we want to differentiate between TYPE, and BROADCAST or
> whatever other possible invalid protocol cases we can get here or just
> dump them all into a single protocol error code.
Such as a SKB_DROP_REASON_PROTO_NOTSUPPORTED? and apply
it to 'GRE_VERSION' such cases too? Which means the data is not supported
by current protocol.
On 3/16/22 10:05 PM, Jakub Kicinski wrote:
> On Wed, 16 Mar 2022 21:35:47 -0600 David Ahern wrote:
>> On 3/16/22 9:18 PM, Jakub Kicinski wrote:
>>>
>>> I guess this set raises the follow up question to Dave if adding
>>> drop reasons to places with MIB exception stats means improving
>>> the granularity or one MIB stat == one reason?
>>
>> There are a few examples where multiple MIB stats are bumped on a drop,
>> but the reason code should always be set based on first failure. Did you
>> mean something else with your question?
>
> I meant whether we want to differentiate between TYPE, and BROADCAST or
> whatever other possible invalid protocol cases we can get here or just
> dump them all into a single protocol error code.
I think a single one is a good starting point.
From: David Ahern
> Sent: 17 March 2022 14:49
>
> On 3/16/22 10:05 PM, Jakub Kicinski wrote:
> > On Wed, 16 Mar 2022 21:35:47 -0600 David Ahern wrote:
> >> On 3/16/22 9:18 PM, Jakub Kicinski wrote:
> >>>
> >>> I guess this set raises the follow up question to Dave if adding
> >>> drop reasons to places with MIB exception stats means improving
> >>> the granularity or one MIB stat == one reason?
> >>
> >> There are a few examples where multiple MIB stats are bumped on a drop,
> >> but the reason code should always be set based on first failure. Did you
> >> mean something else with your question?
> >
> > I meant whether we want to differentiate between TYPE, and BROADCAST or
> > whatever other possible invalid protocol cases we can get here or just
> > dump them all into a single protocol error code.
>
> I think a single one is a good starting point.
I remember looking at (I think) the packet drop stats a while back.
Two machines on the same LAN were reporting rather different values.
Basically 0 v quite a few.
It turned out that passing the packets to dhcp was deemed enough
to stop them being reported as 'dropped'.
And I think that version of dhcp fed every packed into its BPF? filter.
(I never did decide whether that caused every skb to be duplicated.)
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On 3/17/22 8:53 AM, David Laight wrote:
> From: David Ahern
>> Sent: 17 March 2022 14:49
>>
>> On 3/16/22 10:05 PM, Jakub Kicinski wrote:
>>> On Wed, 16 Mar 2022 21:35:47 -0600 David Ahern wrote:
>>>> On 3/16/22 9:18 PM, Jakub Kicinski wrote:
>>>>>
>>>>> I guess this set raises the follow up question to Dave if adding
>>>>> drop reasons to places with MIB exception stats means improving
>>>>> the granularity or one MIB stat == one reason?
>>>>
>>>> There are a few examples where multiple MIB stats are bumped on a drop,
>>>> but the reason code should always be set based on first failure. Did you
>>>> mean something else with your question?
>>>
>>> I meant whether we want to differentiate between TYPE, and BROADCAST or
>>> whatever other possible invalid protocol cases we can get here or just
>>> dump them all into a single protocol error code.
>>
>> I think a single one is a good starting point.
>
> I remember looking at (I think) the packet drop stats a while back.
> Two machines on the same LAN were reporting rather different values.
> Basically 0 v quite a few.
>
> It turned out that passing the packets to dhcp was deemed enough
> to stop them being reported as 'dropped'.
> And I think that version of dhcp fed every packed into its BPF? filter.
> (I never did decide whether that caused every skb to be duplicated.)
>
I believe it depends on the type of socket. Packet sockets - e.g.,
running lldpd or tcpdump - do cause every packet to be cloned and kills
performance.
On Thu, Mar 17, 2022 at 10:48 PM David Ahern <[email protected]> wrote:
>
> On 3/16/22 10:05 PM, Jakub Kicinski wrote:
> > On Wed, 16 Mar 2022 21:35:47 -0600 David Ahern wrote:
> >> On 3/16/22 9:18 PM, Jakub Kicinski wrote:
> >>>
> >>> I guess this set raises the follow up question to Dave if adding
> >>> drop reasons to places with MIB exception stats means improving
> >>> the granularity or one MIB stat == one reason?
> >>
> >> There are a few examples where multiple MIB stats are bumped on a drop,
> >> but the reason code should always be set based on first failure. Did you
> >> mean something else with your question?
> >
> > I meant whether we want to differentiate between TYPE, and BROADCAST or
> > whatever other possible invalid protocol cases we can get here or just
> > dump them all into a single protocol error code.
>
> I think a single one is a good starting point.
Ok, I'll try my best to make a V4 base this way...Is there any inspiration?
Such as we make SKB_DROP_REASON_PTYPE_ABSENT to
SKB_DROP_REASON_L2_PROTO, which means the L2 protocol is not
supported or invalied.
And use SKB_DROP_REASON_L4_PROTO for the L4 protocol problem,
such as GRE version not supported, ICMP type not supported, etc.
Sounds nice, isn't it?
On Fri, Mar 18, 2022 at 12:10 PM David Ahern <[email protected]> wrote:
>
> On 3/17/22 7:37 PM, Menglong Dong wrote:
> > On Thu, Mar 17, 2022 at 10:48 PM David Ahern <[email protected]> wrote:
> >>
> >> On 3/16/22 10:05 PM, Jakub Kicinski wrote:
> >>> On Wed, 16 Mar 2022 21:35:47 -0600 David Ahern wrote:
> >>>> On 3/16/22 9:18 PM, Jakub Kicinski wrote:
> >>>>>
> >>>>> I guess this set raises the follow up question to Dave if adding
> >>>>> drop reasons to places with MIB exception stats means improving
> >>>>> the granularity or one MIB stat == one reason?
> >>>>
> >>>> There are a few examples where multiple MIB stats are bumped on a drop,
> >>>> but the reason code should always be set based on first failure. Did you
> >>>> mean something else with your question?
> >>>
> >>> I meant whether we want to differentiate between TYPE, and BROADCAST or
> >>> whatever other possible invalid protocol cases we can get here or just
> >>> dump them all into a single protocol error code.
> >>
> >> I think a single one is a good starting point.
> >
> > Ok, I'll try my best to make a V4 base this way...Is there any inspiration?
> >
> > Such as we make SKB_DROP_REASON_PTYPE_ABSENT to
> > SKB_DROP_REASON_L2_PROTO, which means the L2 protocol is not
> > supported or invalied.
>
> not following. PTYPE is a Linux name. That means nothing to a user.
>
> I am not sure where you want to use L2_PROTO.
Yeah, PTYPE seems not suitable. I mean that replace SKB_DROP_REASON_PTYPE_ABSENT
that is used in __netif_receive_skb_core() with L3_PROTO, which means no L3
protocol handler (or other device handler) is not found for the
packet. This seems more
friendly and not code based.
>
> >
> > And use SKB_DROP_REASON_L4_PROTO for the L4 protocol problem,
> > such as GRE version not supported, ICMP type not supported, etc.
Is this L4_PROTO followed by anyone?
Thanks!
Menglong Dong
> >
> > Sounds nice, isn't it?
>
On 3/17/22 7:37 PM, Menglong Dong wrote:
> On Thu, Mar 17, 2022 at 10:48 PM David Ahern <[email protected]> wrote:
>>
>> On 3/16/22 10:05 PM, Jakub Kicinski wrote:
>>> On Wed, 16 Mar 2022 21:35:47 -0600 David Ahern wrote:
>>>> On 3/16/22 9:18 PM, Jakub Kicinski wrote:
>>>>>
>>>>> I guess this set raises the follow up question to Dave if adding
>>>>> drop reasons to places with MIB exception stats means improving
>>>>> the granularity or one MIB stat == one reason?
>>>>
>>>> There are a few examples where multiple MIB stats are bumped on a drop,
>>>> but the reason code should always be set based on first failure. Did you
>>>> mean something else with your question?
>>>
>>> I meant whether we want to differentiate between TYPE, and BROADCAST or
>>> whatever other possible invalid protocol cases we can get here or just
>>> dump them all into a single protocol error code.
>>
>> I think a single one is a good starting point.
>
> Ok, I'll try my best to make a V4 base this way...Is there any inspiration?
>
> Such as we make SKB_DROP_REASON_PTYPE_ABSENT to
> SKB_DROP_REASON_L2_PROTO, which means the L2 protocol is not
> supported or invalied.
not following. PTYPE is a Linux name. That means nothing to a user.
I am not sure where you want to use L2_PROTO.
>
> And use SKB_DROP_REASON_L4_PROTO for the L4 protocol problem,
> such as GRE version not supported, ICMP type not supported, etc.
>
> Sounds nice, isn't it?
On Sat, Mar 19, 2022 at 6:33 AM David Ahern <[email protected]> wrote:
>
> On 3/18/22 1:26 AM, Menglong Dong wrote:
> > Yeah, PTYPE seems not suitable. I mean that replace SKB_DROP_REASON_PTYPE_ABSENT
> > that is used in __netif_receive_skb_core() with L3_PROTO, which means no L3
> > protocol handler (or other device handler) is not found for the
> > packet. This seems more
> > friendly and not code based.
> >
> >>> And use SKB_DROP_REASON_L4_PROTO for the L4 protocol problem,
> >>> such as GRE version not supported, ICMP type not supported, etc.
> > Is this L4_PROTO followed by anyone?
>
> how about just a generic
> SKB_DROP_REASON_UNHANDLED_PROTO /* protocol not implemented
> * or not supported
> */
>
> in place of current PTYPE_ABSENT (so a rename to remove a Linux code
> reference), and then use it for no L3 protocol handler, no L4 protocol
> handler, version extensions etc. The instruction pointer to symbol gives
> the context of the unsupported protocol.
Yeah, I think it's a good idea :)
Thanks!
Menglong Dong
On 3/18/22 1:26 AM, Menglong Dong wrote:
> Yeah, PTYPE seems not suitable. I mean that replace SKB_DROP_REASON_PTYPE_ABSENT
> that is used in __netif_receive_skb_core() with L3_PROTO, which means no L3
> protocol handler (or other device handler) is not found for the
> packet. This seems more
> friendly and not code based.
>
>>> And use SKB_DROP_REASON_L4_PROTO for the L4 protocol problem,
>>> such as GRE version not supported, ICMP type not supported, etc.
> Is this L4_PROTO followed by anyone?
how about just a generic
SKB_DROP_REASON_UNHANDLED_PROTO /* protocol not implemented
* or not supported
*/
in place of current PTYPE_ABSENT (so a rename to remove a Linux code
reference), and then use it for no L3 protocol handler, no L4 protocol
handler, version extensions etc. The instruction pointer to symbol gives
the context of the unsupported protocol.