From: Menglong Dong <[email protected]>
snmp is the network package statistics module in kernel, and it is
useful in network issue diagnosis, such as packet drop.
However, it is hard to get the detail information about the packet.
For example, we can know that there is something wrong with the
checksum of udp packet though 'InCsumErrors' of UDP protocol in
/proc/net/snmp, but we can't figure out the ip and port of the packet
that this error is happening on.
Add tracepoint for snmp. Therefor, users can use some tools (such as
eBPF) to get the information of the exceptional packet.
In the first patch, the frame of snmp-tracepoint is created. And in
the second patch, tracepoint for udp-snmp is introduced.
Changes since v1:
- use a single trace event for all statistics type, and special
statistics can be filter by type (procotol) and field.
Now, it will looks like this for udp statistics:
$ cat trace
$ tracer: nop
$
$ entries-in-buffer/entries-written: 4/4 #P:1
$
$ _-----=> irqs-off
$ / _----=> need-resched
$ | / _---=> hardirq/softirq
$ || / _--=> preempt-depth
$ ||| / _-=> migrate-disable
$ |||| / delay
$ TASK-PID CPU# ||||| TIMESTAMP FUNCTION
$ | | | ||||| | |
nc-171 [000] ..s1. 35.952997: snmp: skbaddr=(____ptrval____), type=9, field=2, val=1
nc-171 [000] .N... 35.957006: snmp: skbaddr=(____ptrval____), type=9, field=4, val=1
nc-171 [000] ..s1. 35.957822: snmp: skbaddr=(____ptrval____), type=9, field=2, val=1
nc-171 [000] ..... 35.957893: snmp: skbaddr=(____ptrval____), type=9, field=4, val=1
'type=9' means that the event is triggered by udp statistics and 'field=2'
means that this is triggered by 'NoPorts'. 'val=1' means that increases
of statistics (decrease can happen on tcp).
Menglong Dong (2):
net: snmp: add tracepoint support for snmp
net: snmp: add snmp tracepoint support for udp
include/net/udp.h | 25 ++++++++++++++++-----
include/trace/events/snmp.h | 44 +++++++++++++++++++++++++++++++++++++
include/uapi/linux/snmp.h | 21 ++++++++++++++++++
net/core/net-traces.c | 3 +++
net/ipv4/udp.c | 28 +++++++++++++----------
5 files changed, 104 insertions(+), 17 deletions(-)
create mode 100644 include/trace/events/snmp.h
--
2.27.0
From: Menglong Dong <[email protected]>
snmp is the network package statistics module in kernel, and it is
useful in network issue diagnosis, such as packet drop.
However, it is hard to get the detail information about the packet.
For example, we can know that there is something wrong with the
checksum of udp packet though 'InCsumErrors' of UDP protocol in
/proc/net/snmp, but we can't figure out the ip and port of the packet
that this error is happening on.
Add tracepoint for snmp. Therefor, users can use some tools (such as
eBPF) to get the information of the exceptional packet.
Signed-off-by: Menglong Dong <[email protected]>
v2:
- use a single event, instead of creating events for every protocols
---
include/trace/events/snmp.h | 44 +++++++++++++++++++++++++++++++++++++
include/uapi/linux/snmp.h | 21 ++++++++++++++++++
net/core/net-traces.c | 3 +++
3 files changed, 68 insertions(+)
create mode 100644 include/trace/events/snmp.h
diff --git a/include/trace/events/snmp.h b/include/trace/events/snmp.h
new file mode 100644
index 000000000000..1fa2e31056e0
--- /dev/null
+++ b/include/trace/events/snmp.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM snmp
+
+#if !defined(_TRACE_SNMP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SNMP_H
+
+#include <linux/tracepoint.h>
+#include <linux/skbuff.h>
+#include <linux/snmp.h>
+
+DECLARE_EVENT_CLASS(snmp_template,
+
+ TP_PROTO(struct sk_buff *skb, int type, int field, int val),
+
+ TP_ARGS(skb, type, field, val),
+
+ TP_STRUCT__entry(
+ __field(void *, skbaddr)
+ __field(int, type)
+ __field(int, field)
+ __field(int, val)
+ ),
+
+ TP_fast_assign(
+ __entry->skbaddr = skb;
+ __entry->type = type;
+ __entry->field = field;
+ __entry->val = val;
+ ),
+
+ TP_printk("skbaddr=%p, type=%d, field=%d, val=%d",
+ __entry->skbaddr, __entry->type,
+ __entry->field, __entry->val)
+);
+
+DEFINE_EVENT(snmp_template, snmp,
+ TP_PROTO(struct sk_buff *skb, int type, int field, int val),
+ TP_ARGS(skb, type, field, val)
+);
+
+#endif
+
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 904909d020e2..b96077e09a58 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -347,4 +347,25 @@ enum
__LINUX_MIB_TLSMAX
};
+/* mib type definitions for trace event */
+enum {
+ TRACE_MIB_NUM = 0,
+ TRACE_MIB_IP,
+ TRACE_MIB_IPV6,
+ TRACE_MIB_TCP,
+ TRACE_MIB_NET,
+ TRACE_MIB_ICMP,
+ TRACE_MIB_ICMPV6,
+ TRACE_MIB_ICMPMSG,
+ TRACE_MIB_ICMPV6MSG,
+ TRACE_MIB_UDP,
+ TRACE_MIB_UDPV6,
+ TRACE_MIB_UDPLITE,
+ TRACE_MIB_UDPV6LITE,
+ TRACE_MIB_XFRM,
+ TRACE_MIB_TLS,
+ TRACE_MIB_MPTCP,
+ __TRACE_MIB_MAX
+};
+
#endif /* _LINUX_SNMP_H */
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c40cd8dd75c7..e291c0974438 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -35,6 +35,7 @@
#include <trace/events/tcp.h>
#include <trace/events/fib.h>
#include <trace/events/qdisc.h>
+#include <trace/events/snmp.h>
#if IS_ENABLED(CONFIG_BRIDGE)
#include <trace/events/bridge.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
@@ -61,3 +62,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(snmp);
--
2.27.0
From: Menglong Dong <[email protected]>
Add snmp tracepoint support for udp.
Signed-off-by: Menglong Dong <[email protected]>
---
include/net/udp.h | 25 +++++++++++++++++++------
net/ipv4/udp.c | 28 +++++++++++++++++-----------
2 files changed, 36 insertions(+), 17 deletions(-)
diff --git a/include/net/udp.h b/include/net/udp.h
index f1c2a88c9005..6cf11e0d75b0 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -28,6 +28,7 @@
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/indirect_call_wrapper.h>
+#include <trace/events/snmp.h>
/**
* struct udp_skb_cb - UDP(-Lite) private variables
@@ -384,12 +385,24 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
/*
* SNMP statistics for UDP and UDP-Lite
*/
-#define UDP_INC_STATS(net, field, is_udplite) do { \
- if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
- else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0)
-#define __UDP_INC_STATS(net, field, is_udplite) do { \
- if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
- else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0)
+#define UDP_INC_STATS(net, field, is_udplite) do { \
+ if (is_udplite) { \
+ SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
+ trace_snmp(skb, TRACE_MIB_UDPLITE, field, 1); \
+ } else { \
+ SNMP_INC_STATS((net)->mib.udp_statistics, field); \
+ trace_snmp(skb, TRACE_MIB_UDP, field, 1); \
+ } \
+} while (0)
+#define __UDP_INC_STATS(net, skb, field, is_udplite) do { \
+ if (is_udplite) { \
+ __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
+ trace_snmp(skb, TRACE_MIB_UDPLITE, field, 1); \
+ } else { \
+ __SNMP_INC_STATS((net)->mib.udp_statistics, field); \
+ trace_snmp(skb, TRACE_MIB_UDP, field, 1); \
+ } \
+} while (0)
#define __UDP6_INC_STATS(net, field, is_udplite) do { \
if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field);\
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7101e6d892d6..66ae9eea65c7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1648,9 +1648,11 @@ static struct sk_buff *__first_packet_length(struct sock *sk,
while ((skb = skb_peek(rcvq)) != NULL) {
if (udp_lib_checksum_complete(skb)) {
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
+ __UDP_INC_STATS(sock_net(sk), skb,
+ UDP_MIB_CSUMERRORS,
IS_UDPLITE(sk));
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+ __UDP_INC_STATS(sock_net(sk), skb,
+ UDP_MIB_INERRORS,
IS_UDPLITE(sk));
atomic_inc(&sk->sk_drops);
__skb_unlink(skb, rcvq);
@@ -2143,7 +2145,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
ret = encap_rcv(sk, skb);
if (ret <= 0) {
- __UDP_INC_STATS(sock_net(sk),
+ __UDP_INC_STATS(sock_net(sk), skb,
UDP_MIB_INDATAGRAMS,
is_udplite);
return -ret;
@@ -2201,9 +2203,10 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
return __udp_queue_rcv_skb(sk, skb);
csum_error:
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
+ __UDP_INC_STATS(sock_net(sk), skb, UDP_MIB_CSUMERRORS,
+ is_udplite);
drop:
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ __UDP_INC_STATS(sock_net(sk), skb, UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
return -1;
@@ -2290,9 +2293,9 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
if (unlikely(!nskb)) {
atomic_inc(&sk->sk_drops);
- __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
+ __UDP_INC_STATS(net, skb, UDP_MIB_RCVBUFERRORS,
IS_UDPLITE(sk));
- __UDP_INC_STATS(net, UDP_MIB_INERRORS,
+ __UDP_INC_STATS(net, skb, UDP_MIB_INERRORS,
IS_UDPLITE(sk));
continue;
}
@@ -2311,7 +2314,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
consume_skb(skb);
} else {
kfree_skb(skb);
- __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
+ __UDP_INC_STATS(net, skb, UDP_MIB_IGNOREDMULTI,
proto == IPPROTO_UDPLITE);
}
return 0;
@@ -2454,7 +2457,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp_lib_checksum_complete(skb))
goto csum_error;
- __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+ __UDP_INC_STATS(net, skb, UDP_MIB_NOPORTS,
+ proto == IPPROTO_UDPLITE);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
/*
@@ -2481,9 +2485,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
proto == IPPROTO_UDPLITE ? "Lite" : "",
&saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
ulen);
- __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
+ __UDP_INC_STATS(net, skb, UDP_MIB_CSUMERRORS,
+ proto == IPPROTO_UDPLITE);
drop:
- __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+ __UDP_INC_STATS(net, skb, UDP_MIB_INERRORS,
+ proto == IPPROTO_UDPLITE);
kfree_skb(skb);
return 0;
}
--
2.27.0
On Thu, 18 Nov 2021 20:48:10 +0800
[email protected] wrote:
> nc-171 [000] ..s1. 35.952997: snmp: skbaddr=(____ptrval____), type=9, field=2, val=1
> nc-171 [000] .N... 35.957006: snmp: skbaddr=(____ptrval____), type=9, field=4, val=1
> nc-171 [000] ..s1. 35.957822: snmp: skbaddr=(____ptrval____), type=9, field=2, val=1
> nc-171 [000] ..... 35.957893: snmp: skbaddr=(____ptrval____), type=9, field=4, val=1
>
> 'type=9' means that the event is triggered by udp statistics and 'field=2'
> means that this is triggered by 'NoPorts'. 'val=1' means that increases
> of statistics (decrease can happen on tcp).
Instead of printing numbers, why not print the meanings?
I'll reply to the other patches to explain that.
-- Steve
On 11/18/21 5:48 AM, [email protected] wrote:
> From: Menglong Dong <[email protected]>
>
> snmp is the network package statistics module in kernel, and it is
> useful in network issue diagnosis, such as packet drop.
>
> However, it is hard to get the detail information about the packet.
> For example, we can know that there is something wrong with the
> checksum of udp packet though 'InCsumErrors' of UDP protocol in
> /proc/net/snmp, but we can't figure out the ip and port of the packet
> that this error is happening on.
>
> Add tracepoint for snmp. Therefor, users can use some tools (such as
> eBPF) to get the information of the exceptional packet.
>
> In the first patch, the frame of snmp-tracepoint is created. And in
> the second patch, tracepoint for udp-snmp is introduced.
>
there is already good infrastructure around kfree_skb - e.g., drop watch
monitor. Why not extend that in a way that other drop points can benefit
over time?
e.g., something like this (uncompiled and not tested; and to which
Steven is going to suggest strings for the reason):
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0bd6520329f6..e66e634acad0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1075,8 +1075,13 @@ static inline bool skb_unref(struct sk_buff *skb)
return true;
}
+enum skb_drop_reason {
+ SKB_DROP_REASON_NOT_SPECIFIED,
+ SKB_DROP_REASON_CSUM,
+}
void skb_release_head_state(struct sk_buff *skb);
void kfree_skb(struct sk_buff *skb);
+void kfree_skb_with_reason(struct sk_buff *skb, enum skb_drop_reason);
void kfree_skb_list(struct sk_buff *segs);
void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
void skb_tx_error(struct sk_buff *skb);
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 9e92f22eb086..2a2d263f9d46 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -14,7 +14,7 @@
*/
TRACE_EVENT(kfree_skb,
- TP_PROTO(struct sk_buff *skb, void *location),
+ TP_PROTO(struct sk_buff *skb, void *location, enum
skb_drop_reason reason),
TP_ARGS(skb, location),
@@ -22,16 +22,18 @@ TRACE_EVENT(kfree_skb,
__field( void *, skbaddr )
__field( void *, location )
__field( unsigned short, protocol )
+ __field( unsigned int, reason )
),
TP_fast_assign(
__entry->skbaddr = skb;
__entry->location = location;
__entry->protocol = ntohs(skb->protocol);
+ __entry->reason = reason;
),
- TP_printk("skbaddr=%p protocol=%u location=%p",
- __entry->skbaddr, __entry->protocol, __entry->location)
+ TP_printk("skbaddr=%p protocol=%u location=%p reason %u",
+ __entry->skbaddr, __entry->protocol, __entry->location,
__entry->reason)
);
TRACE_EVENT(consume_skb,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 67a9188d8a49..388059bda3d1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -770,11 +770,29 @@ void kfree_skb(struct sk_buff *skb)
if (!skb_unref(skb))
return;
- trace_kfree_skb(skb, __builtin_return_address(0));
+ trace_kfree_skb(skb, __builtin_return_address(0),
SKB_DROP_REASON_NOT_SPECIFIED);
__kfree_skb(skb);
}
EXPORT_SYMBOL(kfree_skb);
+/**
+ * kfree_skb_with_reason - free an sk_buff
+ * @skb: buffer to free
+ * @reason: enum describing why the skb is dropped
+ *
+ * Drop a reference to the buffer and free it if the usage count has
+ * hit zero.
+ */
+void kfree_skb_with_reason(struct sk_buff *skb, enum skb_drop_reason
reason);
+{
+ if (!skb_unref(skb))
+ return;
+
+ trace_kfree_skb(skb, __builtin_return_address(0), reason);
+ __kfree_skb(skb);
+}
+EXPORT_SYMBOL(kfree_skb_with_reason);
+
void kfree_skb_list(struct sk_buff *segs)
{
while (segs) {
On Thu, 18 Nov 2021 20:48:11 +0800
[email protected] wrote:
> From: Menglong Dong <[email protected]>
>
> snmp is the network package statistics module in kernel, and it is
> useful in network issue diagnosis, such as packet drop.
>
> However, it is hard to get the detail information about the packet.
> For example, we can know that there is something wrong with the
> checksum of udp packet though 'InCsumErrors' of UDP protocol in
> /proc/net/snmp, but we can't figure out the ip and port of the packet
> that this error is happening on.
>
> Add tracepoint for snmp. Therefor, users can use some tools (such as
> eBPF) to get the information of the exceptional packet.
>
> Signed-off-by: Menglong Dong <[email protected]>
> v2:
> - use a single event, instead of creating events for every protocols
> ---
> include/trace/events/snmp.h | 44 +++++++++++++++++++++++++++++++++++++
> include/uapi/linux/snmp.h | 21 ++++++++++++++++++
> net/core/net-traces.c | 3 +++
> 3 files changed, 68 insertions(+)
> create mode 100644 include/trace/events/snmp.h
>
> diff --git a/include/trace/events/snmp.h b/include/trace/events/snmp.h
> new file mode 100644
> index 000000000000..1fa2e31056e0
> --- /dev/null
> +++ b/include/trace/events/snmp.h
> @@ -0,0 +1,44 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM snmp
> +
> +#if !defined(_TRACE_SNMP_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_SNMP_H
> +
> +#include <linux/tracepoint.h>
> +#include <linux/skbuff.h>
> +#include <linux/snmp.h>
Add:
#define TRACE_MIB_VALUES \
EM(TRACE_MIB_NUM, NUM) \
EM(TRACE_MIB_IP, IP) \
EM(TRACE_MIB_IPV6, IPV6) \
EM(TRACE_MIB_TCP, TCP) \
EM(TRACE_MIB_NET, NET) \
EM(TRACE_MIB_ICMP, ICMP) \
EM(TRACE_MIB_ICMPV6, ICMPV6) \
EM(TRACE_MIB_ICMPMSG, ICMPMSG) \
EM(TRACE_MIB_ICMPV6MSG, ICMPV6MSG) \
EM(TRACE_MIB_UDP, UDP) \
EM(TRACE_MIB_UDPV6, UDPV6) \
EM(TRACE_MIB_UDPLITE, UDPLITE) \
EM(TRACE_MIB_UDPV6LITE, UDPV6LITE) \
EM(TRACE_MIB_XFRM, XFRM) \
EM(TRACE_MIB_TLS, TLS) \
EMe(TRACE_MIB_MPTCP, MPTCP)
#define TRACE_UDP_MIB_VALUES \
EM(UDP_MIB_NUM, NUM) \
EM(UDP_MIB_INDATAGRAMS, INDATAGRAMS) \
EM(UDP_MIB_NOPORTS, NOPORTS) \
EM(UDP_MIB_INERRORS, INERRORS) \
EM(UDP_MIB_OUTDATAGRAMS, OUTDATAGRAMS) \
EM(UDP_MIB_RCVBUFERRORS, RCVBUFERRORS) \
EM(UDP_MIB_SNDBUFERRORS, SNDBUFERRORS) \
EM(UDP_MIB_CSUMERRORS, CSUMERRORS) \
EM(UDP_MIB_IGNOREDMULTI, IGNOREDMULTI) \
EMe(UDP_MIB_MEMERRORS, MEMERRORS)
#undef EM
#undef EMe
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
TRACE_MIB_VALUES
TRACE_UDP_MIB_VALES
#undef EM
#undef EMe
#define EM(a,b) { a , #b },
#define EMe(a,b) { a , #b }
> +
> +DECLARE_EVENT_CLASS(snmp_template,
> +
> + TP_PROTO(struct sk_buff *skb, int type, int field, int val),
> +
> + TP_ARGS(skb, type, field, val),
> +
> + TP_STRUCT__entry(
> + __field(void *, skbaddr)
> + __field(int, type)
> + __field(int, field)
> + __field(int, val)
> + ),
> +
> + TP_fast_assign(
> + __entry->skbaddr = skb;
> + __entry->type = type;
> + __entry->field = field;
> + __entry->val = val;
> + ),
> +
> + TP_printk("skbaddr=%p, type=%d, field=%d, val=%d",
Then have here:
__enrty->skbaddr, __print_symbolic(__entry->type, TRACE_MIB_VALUES),
__print_symbolic(__entry->field, TRACE_UDP_MIB_VALUES),
__print_symbolic(__entry->val, { 0, "Decrease" }, { 1, "Increase" })
And then the output will have the proper English terms and not have to rely
on the user knowing what the numbers represent. Also, it allows them to
change in the future.
-- Steve
> + __entry->skbaddr, __entry->type,
> + __entry->field, __entry->val)
> +);
> +
> +DEFINE_EVENT(snmp_template, snmp,
> + TP_PROTO(struct sk_buff *skb, int type, int field, int val),
> + TP_ARGS(skb, type, field, val)
> +);
> +
> +#endif
> +
> +#include <trace/define_trace.h>
> diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
> index 904909d020e2..b96077e09a58 100644
> --- a/include/uapi/linux/snmp.h
> +++ b/include/uapi/linux/snmp.h
> @@ -347,4 +347,25 @@ enum
> __LINUX_MIB_TLSMAX
> };
>
> +/* mib type definitions for trace event */
> +enum {
> + TRACE_MIB_NUM = 0,
> + TRACE_MIB_IP,
> + TRACE_MIB_IPV6,
> + TRACE_MIB_TCP,
> + TRACE_MIB_NET,
> + TRACE_MIB_ICMP,
> + TRACE_MIB_ICMPV6,
> + TRACE_MIB_ICMPMSG,
> + TRACE_MIB_ICMPV6MSG,
> + TRACE_MIB_UDP,
> + TRACE_MIB_UDPV6,
> + TRACE_MIB_UDPLITE,
> + TRACE_MIB_UDPV6LITE,
> + TRACE_MIB_XFRM,
> + TRACE_MIB_TLS,
> + TRACE_MIB_MPTCP,
> + __TRACE_MIB_MAX
> +};
> +
> #endif /* _LINUX_SNMP_H */
> diff --git a/net/core/net-traces.c b/net/core/net-traces.c
> index c40cd8dd75c7..e291c0974438 100644
> --- a/net/core/net-traces.c
> +++ b/net/core/net-traces.c
> @@ -35,6 +35,7 @@
> #include <trace/events/tcp.h>
> #include <trace/events/fib.h>
> #include <trace/events/qdisc.h>
> +#include <trace/events/snmp.h>
> #if IS_ENABLED(CONFIG_BRIDGE)
> #include <trace/events/bridge.h>
> EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
> @@ -61,3 +62,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
>
> EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
> EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
> +
> +EXPORT_TRACEPOINT_SYMBOL_GPL(snmp);
Hello~
On Thu, Nov 18, 2021 at 11:36 PM David Ahern <[email protected]> wrote:
>
[...]
>
> there is already good infrastructure around kfree_skb - e.g., drop watch
> monitor. Why not extend that in a way that other drop points can benefit
> over time?
>
Thanks for your advice.
In fact, I don't think that this is a perfect idea. This way may have benefit
of reuse the existing kfree_skb event, but this will do plentiful modification
to the current code. For example, in tcp_v4_rcv(), you need to introduce the
new variate 'int free_reason' and record the drop reason in it, and pass
it to 'kfree_skb_with_reason()' in 'discard_it:'. Many places need this kind
modification. What's more, some statistics don't use 'kfree_skb()'.
However, with the tracepoint for snmp, we just need to pass 'skb' to
'UDP_INC_STATS()/TCP_INC_STATS()', the reason is already included.
This way, the modification is more simple and easier to maintain.
Thanks!
Menglong Dong
> e.g., something like this (uncompiled and not tested; and to which
> Steven is going to suggest strings for the reason):
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 0bd6520329f6..e66e634acad0 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -1075,8 +1075,13 @@ static inline bool skb_unref(struct sk_buff *skb)
> return true;
> }
>
> +enum skb_drop_reason {
> + SKB_DROP_REASON_NOT_SPECIFIED,
> + SKB_DROP_REASON_CSUM,
> +}
> void skb_release_head_state(struct sk_buff *skb);
> void kfree_skb(struct sk_buff *skb);
> +void kfree_skb_with_reason(struct sk_buff *skb, enum skb_drop_reason);
> void kfree_skb_list(struct sk_buff *segs);
> void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
> void skb_tx_error(struct sk_buff *skb);
> diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
> index 9e92f22eb086..2a2d263f9d46 100644
> --- a/include/trace/events/skb.h
> +++ b/include/trace/events/skb.h
> @@ -14,7 +14,7 @@
> */
> TRACE_EVENT(kfree_skb,
>
> - TP_PROTO(struct sk_buff *skb, void *location),
> + TP_PROTO(struct sk_buff *skb, void *location, enum
> skb_drop_reason reason),
>
> TP_ARGS(skb, location),
>
> @@ -22,16 +22,18 @@ TRACE_EVENT(kfree_skb,
> __field( void *, skbaddr )
> __field( void *, location )
> __field( unsigned short, protocol )
> + __field( unsigned int, reason )
> ),
>
> TP_fast_assign(
> __entry->skbaddr = skb;
> __entry->location = location;
> __entry->protocol = ntohs(skb->protocol);
> + __entry->reason = reason;
> ),
>
> - TP_printk("skbaddr=%p protocol=%u location=%p",
> - __entry->skbaddr, __entry->protocol, __entry->location)
> + TP_printk("skbaddr=%p protocol=%u location=%p reason %u",
> + __entry->skbaddr, __entry->protocol, __entry->location,
> __entry->reason)
> );
>
> TRACE_EVENT(consume_skb,
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 67a9188d8a49..388059bda3d1 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -770,11 +770,29 @@ void kfree_skb(struct sk_buff *skb)
> if (!skb_unref(skb))
> return;
>
> - trace_kfree_skb(skb, __builtin_return_address(0));
> + trace_kfree_skb(skb, __builtin_return_address(0),
> SKB_DROP_REASON_NOT_SPECIFIED);
> __kfree_skb(skb);
> }
> EXPORT_SYMBOL(kfree_skb);
>
> +/**
> + * kfree_skb_with_reason - free an sk_buff
> + * @skb: buffer to free
> + * @reason: enum describing why the skb is dropped
> + *
> + * Drop a reference to the buffer and free it if the usage count has
> + * hit zero.
> + */
> +void kfree_skb_with_reason(struct sk_buff *skb, enum skb_drop_reason
> reason);
> +{
> + if (!skb_unref(skb))
> + return;
> +
> + trace_kfree_skb(skb, __builtin_return_address(0), reason);
> + __kfree_skb(skb);
> +}
> +EXPORT_SYMBOL(kfree_skb_with_reason);
> +
> void kfree_skb_list(struct sk_buff *segs)
> {
> while (segs) {
On 11/18/21 8:45 PM, Menglong Dong wrote:
> Hello~
>
> On Thu, Nov 18, 2021 at 11:36 PM David Ahern <[email protected]> wrote:
>>
> [...]
>>
>> there is already good infrastructure around kfree_skb - e.g., drop watch
>> monitor. Why not extend that in a way that other drop points can benefit
>> over time?
>>
>
> Thanks for your advice.
>
> In fact, I don't think that this is a perfect idea. This way may have benefit
> of reuse the existing kfree_skb event, but this will do plentiful modification
> to the current code. For example, in tcp_v4_rcv(), you need to introduce the
> new variate 'int free_reason' and record the drop reason in it, and pass
> it to 'kfree_skb_with_reason()' in 'discard_it:'. Many places need this kind
> modification. What's more, some statistics don't use 'kfree_skb()'.
>
> However, with the tracepoint for snmp, we just need to pass 'skb' to
> 'UDP_INC_STATS()/TCP_INC_STATS()', the reason is already included.
> This way, the modification is more simple and easier to maintain.
>
But it integrates into existing tooling which is a big win.
Ido gave the references for his work:
https://github.com/nhorman/dropwatch/pull/11
https://github.com/nhorman/dropwatch/commit/199440959a288dd97e3b7ae701d4e78968cddab7
And the Wireshark dissector is also upstream:
https://github.com/wireshark/wireshark/commit/a94a860c0644ec3b8a129fd243674a2e376ce1c8
i.e., the skb is already pushed to userspace for packet analysis. You
would just be augmenting more metadata along with it and not reinventing
all of this for just snmp counter based drops.
On Fri, Nov 19, 2021 at 11:54 AM David Ahern <[email protected]> wrote:
>
[...]
>
> But it integrates into existing tooling which is a big win.
>
> Ido gave the references for his work:
> https://github.com/nhorman/dropwatch/pull/11
> https://github.com/nhorman/dropwatch/commit/199440959a288dd97e3b7ae701d4e78968cddab7
>
I have been thinking about this all day, and I think your words make sense.
Indeed, this can make use of the frame of the 'drop monitor' module of kernel
and the userspace tools of wireshark, dropwatch, etc. And this idea is more
suitable for the aim of 'get the reason for packet drop'. However, the
shortcoming
of this idea is that it can't reuse the drop reason for the 'snmp'
frame.
With creating a tracepoint for 'snmp', it can make use of the 'snmp' frame and
the modifications can be easier. However, it's not friendly to the
users, such as
dropwatch, wireshark, etc. And it seems it is a little redundant with what
the tracepoint for 'kfree_sbk()' do. However, I think it's not
difficult to develop
a userspace tool. In fact, I have already write a tool based on BCC, which is
able to make use of 'snmp' tracepoint, such as:
$ sudo ./nettrace.py --tracer snmp -p udp --addr 192.168.122.8
begin tracing......
785487.366412: [snmp][udplite_noports]: UDP: 192.168.122.8:35310 ->
192.168.122.1:7979
And it can monitor packet drop of udp with ip 192.168.122.8 (filter by port,
statistics type are supported too).
And maybe we can integrate tracepoint of 'snmp' into 'drop monitor' with
NET_DM_ATTR_SNMP, just link NET_DM_ATTR_SW_DROPS and
NET_DM_ATTR_HW_DROPS?
@Steven What do you think? I think I'm ok with both ideas, as my main target
is to get the reason for the packet drop. As for the idea of
'kfree_skb_with_reason', I'm just a little worry about if we can accept the
modification it brings in.
Thanks!
Menglong Dong
> And the Wireshark dissector is also upstream:
> https://github.com/wireshark/wireshark/commit/a94a860c0644ec3b8a129fd243674a2e376ce1c8
>
> i.e., the skb is already pushed to userspace for packet analysis. You
> would just be augmenting more metadata along with it and not reinventing
> all of this for just snmp counter based drops.
On Sun, 21 Nov 2021 18:47:21 +0800
Menglong Dong <[email protected]> wrote:
> @Steven What do you think? I think I'm ok with both ideas, as my main target
> is to get the reason for the packet drop. As for the idea of
> 'kfree_skb_with_reason', I'm just a little worry about if we can accept the
> modification it brings in.
The use cases of trace events is really up to the subsystem
maintainers. I only make sure that the trace events are done properly.
So I'm not sure exactly what you are asking me.
-- Steve
On 11/21/21 3:47 AM, Menglong Dong wrote:
> On Fri, Nov 19, 2021 at 11:54 AM David Ahern <[email protected]> wrote:
>>
> [...]
>>
>> But it integrates into existing tooling which is a big win.
>>
>> Ido gave the references for his work:
>> https://github.com/nhorman/dropwatch/pull/11
>> https://github.com/nhorman/dropwatch/commit/199440959a288dd97e3b7ae701d4e78968cddab7
>>
>
> I have been thinking about this all day, and I think your words make sense.
> Indeed, this can make use of the frame of the 'drop monitor' module of kernel
> and the userspace tools of wireshark, dropwatch, etc. And this idea is more
> suitable for the aim of 'get the reason for packet drop'. However, the
> shortcoming
> of this idea is that it can't reuse the drop reason for the 'snmp'
> frame.
>
> With creating a tracepoint for 'snmp', it can make use of the 'snmp' frame and
> the modifications can be easier. However, it's not friendly to the
> users, such as
> dropwatch, wireshark, etc. And it seems it is a little redundant with what
> the tracepoint for 'kfree_sbk()' do. However, I think it's not
> difficult to develop
> a userspace tool. In fact, I have already write a tool based on BCC, which is
> able to make use of 'snmp' tracepoint, such as:
>
> $ sudo ./nettrace.py --tracer snmp -p udp --addr 192.168.122.8
> begin tracing......
> 785487.366412: [snmp][udplite_noports]: UDP: 192.168.122.8:35310 ->
> 192.168.122.1:7979
>
> And it can monitor packet drop of udp with ip 192.168.122.8 (filter by port,
> statistics type are supported too).
>
> And maybe we can integrate tracepoint of 'snmp' into 'drop monitor' with
> NET_DM_ATTR_SNMP, just link NET_DM_ATTR_SW_DROPS and
> NET_DM_ATTR_HW_DROPS?
>
you don't need to add 'snmp' to drop monitor; you only need to add
NET_DM_ATTR_ to the existing one.
This is the end of __udp4_lib_rcv:
__UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
drop:
__UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
you want to add a tracepoint at both UDP_INC_STATS making 3 consecutive
lines that give access to the dropped skb with only slight variations in
metadata.
The last one, kfree_skb, gives you the address of the drop + the skb for
analysis. Just add the metadata to the existing drop monitor.
On Tue, Nov 23, 2021 at 12:42 AM David Ahern <[email protected]> wrote:
>
> On 11/21/21 3:47 AM, Menglong Dong wrote:
> > On Fri, Nov 19, 2021 at 11:54 AM David Ahern <[email protected]> wrote:
> >>
> > [...]
> >>
> >> But it integrates into existing tooling which is a big win.
> >>
> >> Ido gave the references for his work:
> >> https://github.com/nhorman/dropwatch/pull/11
> >> https://github.com/nhorman/dropwatch/commit/199440959a288dd97e3b7ae701d4e78968cddab7
> >>
> >
> > I have been thinking about this all day, and I think your words make sense.
> > Indeed, this can make use of the frame of the 'drop monitor' module of kernel
> > and the userspace tools of wireshark, dropwatch, etc. And this idea is more
> > suitable for the aim of 'get the reason for packet drop'. However, the
> > shortcoming
> > of this idea is that it can't reuse the drop reason for the 'snmp'
> > frame.
> >
> > With creating a tracepoint for 'snmp', it can make use of the 'snmp' frame and
> > the modifications can be easier. However, it's not friendly to the
> > users, such as
> > dropwatch, wireshark, etc. And it seems it is a little redundant with what
> > the tracepoint for 'kfree_sbk()' do. However, I think it's not
> > difficult to develop
> > a userspace tool. In fact, I have already write a tool based on BCC, which is
> > able to make use of 'snmp' tracepoint, such as:
> >
> > $ sudo ./nettrace.py --tracer snmp -p udp --addr 192.168.122.8
> > begin tracing......
> > 785487.366412: [snmp][udplite_noports]: UDP: 192.168.122.8:35310 ->
> > 192.168.122.1:7979
> >
> > And it can monitor packet drop of udp with ip 192.168.122.8 (filter by port,
> > statistics type are supported too).
> >
> > And maybe we can integrate tracepoint of 'snmp' into 'drop monitor' with
> > NET_DM_ATTR_SNMP, just link NET_DM_ATTR_SW_DROPS and
> > NET_DM_ATTR_HW_DROPS?
> >
>
> you don't need to add 'snmp' to drop monitor; you only need to add
> NET_DM_ATTR_ to the existing one.
>
> This is the end of __udp4_lib_rcv:
>
> __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
> drop:
> __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
> kfree_skb(skb);
>
> you want to add a tracepoint at both UDP_INC_STATS making 3 consecutive
> lines that give access to the dropped skb with only slight variations in
> metadata.
>
> The last one, kfree_skb, gives you the address of the drop + the skb for
> analysis. Just add the metadata to the existing drop monitor.
Ok, get it! Thanks~
Menglong Dong
>