From: Menglong Dong <[email protected]>
Replace kfree_skb() with kfree_skb_reason() in ip_rcv_finish_core(),
following drop reasons are introduced:
SKB_DROP_REASON_IP_ROUTE_INPUT
SKB_DROP_REASON_IP_RPFILTER
SKB_DROP_REASON_EARLY_DEMUX
SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST
Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 4 ++++
include/trace/events/skb.h | 5 +++++
net/ipv4/ip_input.c | 22 ++++++++++++++++------
3 files changed, 25 insertions(+), 6 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f3028028b83e..8942d32c0657 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -324,6 +324,10 @@ enum skb_drop_reason {
SKB_DROP_REASON_OTHERHOST,
SKB_DROP_REASON_IP_CSUM,
SKB_DROP_REASON_IP_INHDR,
+ SKB_DROP_REASON_IP_ROUTE_INPUT,
+ SKB_DROP_REASON_IP_RPFILTER,
+ SKB_DROP_REASON_EARLY_DEMUX,
+ SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST,
SKB_DROP_REASON_MAX,
};
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index d1b0d9690e62..1dcdcc92cf08 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -20,6 +20,11 @@
EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \
EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \
EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \
+ EM(SKB_DROP_REASON_IP_ROUTE_INPUT, IP_ROUTE_INPUT) \
+ EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \
+ EM(SKB_DROP_REASON_EARLY_DEMUX, EARLY_DEMUX) \
+ EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \
+ UNICAST_IN_L2_MULTICAST) \
EMe(SKB_DROP_REASON_MAX, MAX)
#undef EM
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ab9bee4bbf0a..77bb9ddc441b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -318,8 +318,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
{
const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
+ int err, drop_reason;
struct rtable *rt;
- int err;
+
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (ip_can_use_hint(skb, iph, hint)) {
err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
@@ -339,8 +341,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
udp_v4_early_demux, skb);
- if (unlikely(err))
+ if (unlikely(err)) {
+ drop_reason = SKB_DROP_REASON_EARLY_DEMUX;
goto drop_error;
+ }
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
@@ -353,8 +357,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
if (!skb_valid_dst(skb)) {
err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
iph->tos, dev);
- if (unlikely(err))
+ if (unlikely(err)) {
+ drop_reason = SKB_DROP_REASON_IP_ROUTE_INPUT;
goto drop_error;
+ }
}
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -396,19 +402,23 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
* so-called "hole-196" attack) so do it for both.
*/
if (in_dev &&
- IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
+ IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
+ drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
goto drop;
+ }
}
return NET_RX_SUCCESS;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return NET_RX_DROP;
drop_error:
- if (err == -EXDEV)
+ if (err == -EXDEV) {
+ drop_reason = SKB_DROP_REASON_IP_RPFILTER;
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+ }
goto drop;
}
--
2.34.1
On 1/24/22 6:15 AM, [email protected] wrote:
> diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
> index ab9bee4bbf0a..77bb9ddc441b 100644
> --- a/net/ipv4/ip_input.c
> +++ b/net/ipv4/ip_input.c
> @@ -318,8 +318,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> {
> const struct iphdr *iph = ip_hdr(skb);
> int (*edemux)(struct sk_buff *skb);
> + int err, drop_reason;
> struct rtable *rt;
> - int err;
> +
> + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
>
> if (ip_can_use_hint(skb, iph, hint)) {
> err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
> @@ -339,8 +341,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
> err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
> udp_v4_early_demux, skb);
> - if (unlikely(err))
> + if (unlikely(err)) {
> + drop_reason = SKB_DROP_REASON_EARLY_DEMUX;
is there really value in this one? You ignore the error case from
ip_route_use_hint which is a similar, highly unlikely error path so why
care about this one? The only failure case is ip_mc_validate_source from
udp_v4_early_demux and 'early demux' drops really mean nothing to the user.
> goto drop_error;
> + }
> /* must reload iph, skb->head might have changed */
> iph = ip_hdr(skb);
> }
> @@ -353,8 +357,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> if (!skb_valid_dst(skb)) {
> err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> iph->tos, dev);
> - if (unlikely(err))
> + if (unlikely(err)) {
> + drop_reason = SKB_DROP_REASON_IP_ROUTE_INPUT;
The reason codes should be meaningful to users and not derived from a
code path. What does SKB_DROP_REASON_IP_ROUTE_INPUT mean as a failure?
> goto drop_error;
> + }
> }
>
> #ifdef CONFIG_IP_ROUTE_CLASSID
On Wed, Jan 26, 2022 at 10:18 AM David Ahern <[email protected]> wrote:
>
> On 1/24/22 6:15 AM, [email protected] wrote:
> > diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
> > index ab9bee4bbf0a..77bb9ddc441b 100644
> > --- a/net/ipv4/ip_input.c
> > +++ b/net/ipv4/ip_input.c
> > @@ -318,8 +318,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> > {
> > const struct iphdr *iph = ip_hdr(skb);
> > int (*edemux)(struct sk_buff *skb);
> > + int err, drop_reason;
> > struct rtable *rt;
> > - int err;
> > +
> > + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
> >
> > if (ip_can_use_hint(skb, iph, hint)) {
> > err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
> > @@ -339,8 +341,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> > if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
> > err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
> > udp_v4_early_demux, skb);
> > - if (unlikely(err))
> > + if (unlikely(err)) {
> > + drop_reason = SKB_DROP_REASON_EARLY_DEMUX;
>
> is there really value in this one? You ignore the error case from
> ip_route_use_hint which is a similar, highly unlikely error path so why
> care about this one? The only failure case is ip_mc_validate_source from
> udp_v4_early_demux and 'early demux' drops really mean nothing to the user.
>
Ok, let's just ignore it ( In fact, it's because that I don't know
what 'early demux'
do :/ )
>
> > goto drop_error;
> > + }
> > /* must reload iph, skb->head might have changed */
> > iph = ip_hdr(skb);
> > }
> > @@ -353,8 +357,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> > if (!skb_valid_dst(skb)) {
> > err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> > iph->tos, dev);
> > - if (unlikely(err))
> > + if (unlikely(err)) {
> > + drop_reason = SKB_DROP_REASON_IP_ROUTE_INPUT;
>
> The reason codes should be meaningful to users and not derived from a
> code path. What does SKB_DROP_REASON_IP_ROUTE_INPUT mean as a failure?
>
Is't it meaningful? I name it from the meaning of 'ip route lookup or validate
failed in input path', can't it express this information?
>
> > goto drop_error;
> > + }
> > }
> >
> > #ifdef CONFIG_IP_ROUTE_CLASSID
On 1/25/22 7:36 PM, Menglong Dong wrote:
> Is't it meaningful? I name it from the meaning of 'ip route lookup or validate
> failed in input path', can't it express this information?
ip_route_input_noref has many failures and not all of them are FIB
lookups. ip_route_input_slow has a bunch of EINVAL cases for example.
Returning a 'reason' as the code function name has no meaning to a user
and could actually be misleading in some cases. I would skip this one
for now.
On Wed, Jan 26, 2022 at 10:57 AM David Ahern <[email protected]> wrote:
>
> On 1/25/22 7:36 PM, Menglong Dong wrote:
> > Is't it meaningful? I name it from the meaning of 'ip route lookup or validate
> > failed in input path', can't it express this information?
>
>
> ip_route_input_noref has many failures and not all of them are FIB
> lookups. ip_route_input_slow has a bunch of EINVAL cases for example.
>
> Returning a 'reason' as the code function name has no meaning to a user
> and could actually be misleading in some cases. I would skip this one
> for now.
Yeah, the real reason can be complex. I'll skip this case for now.