2022-02-09 05:31:42

by Yajun Deng

[permalink] [raw]
Subject: [PATCH net-next] net: dev: introduce netdev_drop_inc()

We will use 'sudo perf record -g -a -e skb:kfree_skb' command to trace
the dropped packets when dropped increase in the output of ifconfig.
But there are two cases, one is only called kfree_skb(), another is
increasing the dropped and called kfree_skb(). The latter is what
we need. So we need to separate these two cases.

From the other side, the dropped packet came from the core network and
the driver, we also need to separate these two cases.

Add netdev_drop_inc() and add a tracepoint for the core network dropped
packets. use 'sudo perf record -g -a -e net:netdev_drop' and 'sudo perf
script' will recored the dropped packets by the core network.

Signed-off-by: Yajun Deng <[email protected]>
---
include/linux/netdevice.h | 7 +++++++
include/trace/events/net.h | 27 +++++++++++++++++++++++++++
net/core/dev.c | 30 ++++++++++++++++++++++++------
3 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3fb6fb67ed77..f7e8b1e33076 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2285,6 +2285,13 @@ struct net_device {
};
#define to_net_dev(d) container_of(d, struct net_device, dev)

+enum netdev_drop {
+ NETDEV_RX_DROPPED,
+ NETDEV_TX_DROPPED,
+ NETDEV_RX_NOHANDLER,
+};
+void netdev_drop_inc(struct net_device *dev, enum netdev_drop drop);
+
static inline bool netif_elide_gro(const struct net_device *dev)
{
if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 78c448c6ab4c..0f8e9762a856 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -118,6 +118,33 @@ TRACE_EVENT(net_dev_xmit_timeout,
__get_str(name), __get_str(driver), __entry->queue_index)
);

+TRACE_EVENT(netdev_drop,
+
+ TP_PROTO(struct net_device *dev, void *location),
+
+ TP_ARGS(dev, location),
+
+ TP_STRUCT__entry(
+ __string(name, dev->name)
+ __field(void *, location)
+ __field(unsigned long, rx_dropped)
+ __field(unsigned long, tx_dropped)
+ __field(unsigned long, rx_nohandler)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, dev->name);
+ __entry->location = location;
+ __entry->rx_dropped = (unsigned long)atomic_long_read(&dev->rx_dropped);
+ __entry->tx_dropped = (unsigned long)atomic_long_read(&dev->tx_dropped);
+ __entry->rx_nohandler = (unsigned long)atomic_long_read(&dev->rx_nohandler);
+ ),
+
+ TP_printk("dev=%s rx_dropped=%lu tx_dropped=%lu rx_nohandler=%lu location=%p",
+ __get_str(name), __entry->rx_dropped, __entry->tx_dropped, __entry->rx_nohandler,
+ __entry->location)
+);
+
DECLARE_EVENT_CLASS(net_dev_template,

TP_PROTO(struct sk_buff *skb),
diff --git a/net/core/dev.c b/net/core/dev.c
index f662c6a7d7b4..213f9d1eaa8d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -392,6 +392,24 @@ static void unlist_netdevice(struct net_device *dev)
dev_base_seq_inc(dev_net(dev));
}

+void netdev_drop_inc(struct net_device *dev, enum netdev_drop drop)
+{
+ switch (drop) {
+ case NETDEV_RX_DROPPED:
+ atomic_long_inc(&dev->rx_dropped);
+ break;
+ case NETDEV_TX_DROPPED:
+ atomic_long_inc(&dev->tx_dropped);
+ break;
+ case NETDEV_RX_NOHANDLER:
+ atomic_long_inc(&dev->rx_nohandler);
+ break;
+ default:
+ break;
+ }
+ trace_netdev_drop(dev, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(netdev_drop_inc);
/*
* Our notifier list
*/
@@ -3586,7 +3604,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
out_kfree_skb:
kfree_skb(skb);
out_null:
- atomic_long_inc(&dev->tx_dropped);
+ netdev_drop_inc(dev, NETDEV_TX_DROPPED);
return NULL;
}

@@ -4136,7 +4154,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
rc = -ENETDOWN;
rcu_read_unlock_bh();

- atomic_long_inc(&dev->tx_dropped);
+ netdev_drop_inc(dev, NETDEV_TX_DROPPED);
kfree_skb_list(skb);
return rc;
out:
@@ -4188,7 +4206,7 @@ int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
local_bh_enable();
return ret;
drop:
- atomic_long_inc(&dev->tx_dropped);
+ netdev_drop_inc(dev, NETDEV_TX_DROPPED);
kfree_skb_list(skb);
return NET_XMIT_DROP;
}
@@ -4557,7 +4575,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,

local_irq_restore(flags);

- atomic_long_inc(&skb->dev->rx_dropped);
+ netdev_drop_inc(skb->dev, NETDEV_RX_DROPPED);
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -5319,9 +5337,9 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
} else {
drop:
if (!deliver_exact)
- atomic_long_inc(&skb->dev->rx_dropped);
+ netdev_drop_inc(skb->dev, NETDEV_RX_DROPPED);
else
- atomic_long_inc(&skb->dev->rx_nohandler);
+ netdev_drop_inc(skb->dev, NETDEV_RX_NOHANDLER);
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
--
2.25.1



2022-02-09 07:01:29

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH net-next] net: dev: introduce netdev_drop_inc()

On Wed, 09 Feb 2022 02:20:07 +0000 [email protected] wrote:
> February 9, 2022 8:27 AM, "Jakub Kicinski" <[email protected]> wrote:
>
> > On Tue, 8 Feb 2022 14:43:18 +0800 Yajun Deng wrote:
> >
> >> We will use 'sudo perf record -g -a -e skb:kfree_skb' command to trace
> >> the dropped packets when dropped increase in the output of ifconfig.
> >> But there are two cases, one is only called kfree_skb(), another is
> >> increasing the dropped and called kfree_skb(). The latter is what
> >> we need. So we need to separate these two cases.
> >>
> >> From the other side, the dropped packet came from the core network and
> >> the driver, we also need to separate these two cases.
> >>
> >> Add netdev_drop_inc() and add a tracepoint for the core network dropped
> >> packets. use 'sudo perf record -g -a -e net:netdev_drop' and 'sudo perf
> >> script' will recored the dropped packets by the core network.
> >>
> >> Signed-off-by: Yajun Deng <[email protected]>
> >
> > Have you seen the work that's being done around kfree_skb_reason()?
>
> Yes, I saw it. The focus of kfree_skb_reason() is trace kfree_skb() and the reason,
> but the focus of this patch only traces this case of the dropped packet.
>
> I don't want to trace all kfree_skb(), but I just want to trace the dropped packet.
>
> This command 'sudo perf record -g -a -e skb:kfree_skb' would trace all kfree_skb(),
> kfree_skb() would drowned out the case of dropped packets when the samples were too large.

IIRC perf support filters, I think with -f? We can't add a tracepoint
for every combination of attributes.

2022-02-09 09:30:28

by Yajun Deng

[permalink] [raw]
Subject: Re: [PATCH net-next] net: dev: introduce netdev_drop_inc()

February 9, 2022 8:27 AM, "Jakub Kicinski" <[email protected]> wrote:

> On Tue, 8 Feb 2022 14:43:18 +0800 Yajun Deng wrote:
>
>> We will use 'sudo perf record -g -a -e skb:kfree_skb' command to trace
>> the dropped packets when dropped increase in the output of ifconfig.
>> But there are two cases, one is only called kfree_skb(), another is
>> increasing the dropped and called kfree_skb(). The latter is what
>> we need. So we need to separate these two cases.
>>
>> From the other side, the dropped packet came from the core network and
>> the driver, we also need to separate these two cases.
>>
>> Add netdev_drop_inc() and add a tracepoint for the core network dropped
>> packets. use 'sudo perf record -g -a -e net:netdev_drop' and 'sudo perf
>> script' will recored the dropped packets by the core network.
>>
>> Signed-off-by: Yajun Deng <[email protected]>
>
> Have you seen the work that's being done around kfree_skb_reason()?

Yes, I saw it. The focus of kfree_skb_reason() is trace kfree_skb() and the reason,
but the focus of this patch only traces this case of the dropped packet.

I don't want to trace all kfree_skb(), but I just want to trace the dropped packet.

This command 'sudo perf record -g -a -e skb:kfree_skb' would trace all kfree_skb(),
kfree_skb() would drowned out the case of dropped packets when the samples were too large.

2022-02-09 11:58:58

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH net-next] net: dev: introduce netdev_drop_inc()

On Tue, 8 Feb 2022 14:43:18 +0800 Yajun Deng wrote:
> We will use 'sudo perf record -g -a -e skb:kfree_skb' command to trace
> the dropped packets when dropped increase in the output of ifconfig.
> But there are two cases, one is only called kfree_skb(), another is
> increasing the dropped and called kfree_skb(). The latter is what
> we need. So we need to separate these two cases.
>
> From the other side, the dropped packet came from the core network and
> the driver, we also need to separate these two cases.
>
> Add netdev_drop_inc() and add a tracepoint for the core network dropped
> packets. use 'sudo perf record -g -a -e net:netdev_drop' and 'sudo perf
> script' will recored the dropped packets by the core network.
>
> Signed-off-by: Yajun Deng <[email protected]>

Have you seen the work that's being done around kfree_skb_reason()?

2022-02-09 12:47:29

by Yajun Deng

[permalink] [raw]
Subject: Re: [PATCH net-next] net: dev: introduce netdev_drop_inc()

February 9, 2022 11:53 AM, "Jakub Kicinski" <[email protected]> wrote:

> On Wed, 09 Feb 2022 02:20:07 +0000 [email protected] wrote:
>
>> February 9, 2022 8:27 AM, "Jakub Kicinski" <[email protected]> wrote:
>>
>> On Tue, 8 Feb 2022 14:43:18 +0800 Yajun Deng wrote:
>>
>> We will use 'sudo perf record -g -a -e skb:kfree_skb' command to trace
>> the dropped packets when dropped increase in the output of ifconfig.
>> But there are two cases, one is only called kfree_skb(), another is
>> increasing the dropped and called kfree_skb(). The latter is what
>> we need. So we need to separate these two cases.
>>
>> From the other side, the dropped packet came from the core network and
>> the driver, we also need to separate these two cases.
>>
>> Add netdev_drop_inc() and add a tracepoint for the core network dropped
>> packets. use 'sudo perf record -g -a -e net:netdev_drop' and 'sudo perf
>> script' will recored the dropped packets by the core network.
>>
>> Signed-off-by: Yajun Deng <[email protected]>
>>
>> Have you seen the work that's being done around kfree_skb_reason()?
>>
>> Yes, I saw it. The focus of kfree_skb_reason() is trace kfree_skb() and the reason,
>> but the focus of this patch only traces this case of the dropped packet.
>>
>> I don't want to trace all kfree_skb(), but I just want to trace the dropped packet.
>>
>> This command 'sudo perf record -g -a -e skb:kfree_skb' would trace all kfree_skb(),
>> kfree_skb() would drowned out the case of dropped packets when the samples were too large.
>
> IIRC perf support filters, I think with -f? We can't add a tracepoint
> for every combination of attributes.

Yes, we can use a command like this: " sudo perf record -g -a -e skb:kfree_skb --filter 'protocol == 0x0800' ",
However, only the filter is defined in kfree_skb tracepoint are available.

The purpose of this patch is record {rx_dropped, tx_dropped, rx_nohandler} in struct net_device, to distinguish
with struct net_device_stats.

We don't have any tracepoint records {rx_dropped, tx_dropped, rx_nohandler} in struct net_device now.
Can we add {rx_dropped, tx_dropped, rx_nohandler} in kfree_skb tracepoint? like this:

TP_STRUCT__entry(
__field(void *, skbaddr)
__field(void *, location)
__field(unsigned short, protocol)
__field(enum skb_drop_reason, reason)
__field(unsigned long, rx_dropped)
__field(unsigned long, tx_dropped)
__field(unsigned long, rx_nohandler)

),

TP_fast_assign(
__entry->skbaddr = skb;
__entry->location = location;
__entry->protocol = ntohs(skb->protocol);
__entry->reason = reason;
__entry->rx_dropped = (unsigned long)atomic_long_read(&skb->dev->rx_dropped);
__entry->tx_dropped = (unsigned long)atomic_long_read(&skb->dev->tx_dropped);
__entry->rx_nohandler = (unsigned long)atomic_long_read(&skb->dev->rx_nohandler);
),

If so, we can record this but not add a tracepoint.

2022-02-09 16:46:58

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH net-next] net: dev: introduce netdev_drop_inc()

On Wed, 09 Feb 2022 07:27:44 +0000 [email protected] wrote:
> February 9, 2022 11:53 AM, "Jakub Kicinski" <[email protected]> wrote:
> > IIRC perf support filters, I think with -f? We can't add a tracepoint
> > for every combination of attributes.
>
> Yes, we can use a command like this: " sudo perf record -g -a -e skb:kfree_skb --filter 'protocol == 0x0800' ",
> However, only the filter is defined in kfree_skb tracepoint are available.
>
> The purpose of this patch is record {rx_dropped, tx_dropped, rx_nohandler} in struct net_device, to distinguish
> with struct net_device_stats.
>
> We don't have any tracepoint records {rx_dropped, tx_dropped, rx_nohandler} in struct net_device now.
> Can we add {rx_dropped, tx_dropped, rx_nohandler} in kfree_skb tracepoint? like this:
>
> TP_STRUCT__entry(
> __field(void *, skbaddr)
> __field(void *, location)
> __field(unsigned short, protocol)
> __field(enum skb_drop_reason, reason)
> __field(unsigned long, rx_dropped)
> __field(unsigned long, tx_dropped)
> __field(unsigned long, rx_nohandler)
>
> ),
>
> TP_fast_assign(
> __entry->skbaddr = skb;
> __entry->location = location;
> __entry->protocol = ntohs(skb->protocol);
> __entry->reason = reason;
> __entry->rx_dropped = (unsigned long)atomic_long_read(&skb->dev->rx_dropped);
> __entry->tx_dropped = (unsigned long)atomic_long_read(&skb->dev->tx_dropped);
> __entry->rx_nohandler = (unsigned long)atomic_long_read(&skb->dev->rx_nohandler);
> ),
>
> If so, we can record this but not add a tracepoint.

You can use bpftrace to dereference extra information.