2021-10-15 20:00:09

by Mark Pashmfouroush

[permalink] [raw]
Subject: [PATCH bpf-next 1/2] bpf: Add ifindex to bpf_sk_lookup

It may be helpful to have access to the ifindex during bpf socket
lookup. Add this to the bpf_sk_lookup API.

Signed-off-by: Mark Pashmfouroush <[email protected]>

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 47f80adbe744..54ffd8036be6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1370,6 +1370,7 @@ struct bpf_sk_lookup_kern {
const struct in6_addr *daddr;
} v6;
struct sock *selected_sk;
+ u32 ifindex;
bool no_reuseport;
};

@@ -1432,7 +1433,7 @@ extern struct static_key_false bpf_sk_lookup_enabled;
static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 dport,
- struct sock **psk)
+ const int ifindex, struct sock **psk)
{
struct bpf_prog_array *run_array;
struct sock *selected_sk = NULL;
@@ -1448,6 +1449,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
.v4.daddr = daddr,
.sport = sport,
.dport = dport,
+ .ifindex = ifindex,
};
u32 act;

@@ -1470,7 +1472,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
const __be16 sport,
const struct in6_addr *daddr,
const u16 dport,
- struct sock **psk)
+ const int ifindex, struct sock **psk)
{
struct bpf_prog_array *run_array;
struct sock *selected_sk = NULL;
@@ -1486,6 +1488,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
.v6.daddr = daddr,
.sport = sport,
.dport = dport,
+ .ifindex = ifindex,
};
u32 act;

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6fc59d61937a..9bd3e8b8a659 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6262,6 +6262,7 @@ struct bpf_sk_lookup {
__u32 local_ip4; /* Network byte order */
__u32 local_ip6[4]; /* Network byte order */
__u32 local_port; /* Host byte order */
+ __u32 ifindex; /* Maps to skb->dev->ifindex */
};

/*
diff --git a/net/core/filter.c b/net/core/filter.c
index 4bace37a6a44..9514c6bbd117 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10491,6 +10491,7 @@ static bool sk_lookup_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
case bpf_ctx_range(struct bpf_sk_lookup, local_port):
+ case bpf_ctx_range(struct bpf_sk_lookup, ifindex):
bpf_ctx_record_field_size(info, sizeof(__u32));
return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));

@@ -10580,6 +10581,12 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
bpf_target_off(struct bpf_sk_lookup_kern,
dport, 2, target_size));
break;
+
+ case offsetof(struct bpf_sk_lookup, ifindex):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ ifindex, 4, target_size));
+ break;
}

return insn - insn_buf;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 80aeaf9e6e16..088bb6c27114 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -305,7 +305,7 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
- __be32 daddr, u16 hnum)
+ __be32 daddr, u16 hnum, const int dif)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
@@ -313,8 +313,8 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net,
if (hashinfo != &tcp_hashinfo)
return NULL; /* only TCP is supported */

- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
- saddr, sport, daddr, hnum, &sk);
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport,
+ daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;

@@ -338,7 +338,7 @@ struct sock *__inet_lookup_listener(struct net *net,
/* Lookup redirect from BPF */
if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
- saddr, sport, daddr, hnum);
+ saddr, sport, daddr, hnum, dif);
if (result)
goto done;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2a7825a5b842..f4ddfa38449e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -459,7 +459,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net,
struct udp_table *udptable,
struct sk_buff *skb,
__be32 saddr, __be16 sport,
- __be32 daddr, u16 hnum)
+ __be32 daddr, u16 hnum, const int dif)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
@@ -467,8 +467,8 @@ static struct sock *udp4_lookup_run_bpf(struct net *net,
if (udptable != &udp_table)
return NULL; /* only UDP is supported */

- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP,
- saddr, sport, daddr, hnum, &sk);
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport,
+ daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;

@@ -504,7 +504,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
/* Lookup redirect from BPF */
if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
sk = udp4_lookup_run_bpf(net, udptable, skb,
- saddr, sport, daddr, hnum);
+ saddr, sport, daddr, hnum, dif);
if (sk) {
result = sk;
goto done;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 55c290d55605..8d25cb5d124b 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -165,7 +165,7 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
- const u16 hnum)
+ const u16 hnum, const int dif)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
@@ -173,8 +173,8 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net,
if (hashinfo != &tcp_hashinfo)
return NULL; /* only TCP is supported */

- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP,
- saddr, sport, daddr, hnum, &sk);
+ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport,
+ daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;

@@ -198,7 +198,7 @@ struct sock *inet6_lookup_listener(struct net *net,
/* Lookup redirect from BPF */
if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
- saddr, sport, daddr, hnum);
+ saddr, sport, daddr, hnum, dif);
if (result)
goto done;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e505bb007e9f..77ba0917b3ea 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -194,7 +194,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net,
const struct in6_addr *saddr,
__be16 sport,
const struct in6_addr *daddr,
- u16 hnum)
+ u16 hnum, const int dif)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
@@ -202,8 +202,8 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net,
if (udptable != &udp_table)
return NULL; /* only UDP is supported */

- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP,
- saddr, sport, daddr, hnum, &sk);
+ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport,
+ daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;

@@ -239,7 +239,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
/* Lookup redirect from BPF */
if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
sk = udp6_lookup_run_bpf(net, udptable, skb,
- saddr, sport, daddr, hnum);
+ saddr, sport, daddr, hnum, dif);
if (sk) {
result = sk;
goto done;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6fc59d61937a..9bd3e8b8a659 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6262,6 +6262,7 @@ struct bpf_sk_lookup {
__u32 local_ip4; /* Network byte order */
__u32 local_ip6[4]; /* Network byte order */
__u32 local_port; /* Host byte order */
+ __u32 ifindex; /* Maps to skb->dev->ifindex */
};

/*
--
2.31.1


2021-10-21 01:41:36

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH bpf-next 1/2] bpf: Add ifindex to bpf_sk_lookup

On Fri, Oct 15, 2021 at 4:24 AM Mark Pashmfouroush
<[email protected]> wrote:
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 6fc59d61937a..9bd3e8b8a659 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -6262,6 +6262,7 @@ struct bpf_sk_lookup {
> __u32 local_ip4; /* Network byte order */
> __u32 local_ip6[4]; /* Network byte order */
> __u32 local_port; /* Host byte order */
> + __u32 ifindex; /* Maps to skb->dev->ifindex */

Is the comment accurate?
The bpf_sk_lookup_kern ifindex is populated with inet_iif(skb).
Which is skb->skb_iif at this point (I think).
skb->dev->ifindex would typically mean destination or egress ifindex.
In __sk_buff we have 'ifindex' and 'ingress_ifindex' to differentiate them.
If it's really dev->ifindex than keeping 'ifindex' name here would be correct,
but looking at how it's populated in inet/udp_lookup makes me wonder
whether it should be named 'ingress_ifindex' instead and comment clarified.

If/when you resubmit please trim cc list to a minimum.

2021-10-21 19:02:50

by John Fastabend

[permalink] [raw]
Subject: RE: [PATCH bpf-next 1/2] bpf: Add ifindex to bpf_sk_lookup

Mark Pashmfouroush wrote:
> It may be helpful to have access to the ifindex during bpf socket
> lookup. Add this to the bpf_sk_lookup API.
>
> Signed-off-by: Mark Pashmfouroush <[email protected]>
>

Would be nice to have more details on the 'use case' here. I
don't know off-hand how it 'may be helpful'.

For the actual code though LGTM.

Acked-by: John Fastabend <[email protected]>

2021-10-21 19:09:30

by John Fastabend

[permalink] [raw]
Subject: Re: [PATCH bpf-next 1/2] bpf: Add ifindex to bpf_sk_lookup

Alexei Starovoitov wrote:
> On Fri, Oct 15, 2021 at 4:24 AM Mark Pashmfouroush
> <[email protected]> wrote:
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 6fc59d61937a..9bd3e8b8a659 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -6262,6 +6262,7 @@ struct bpf_sk_lookup {
> > __u32 local_ip4; /* Network byte order */
> > __u32 local_ip6[4]; /* Network byte order */
> > __u32 local_port; /* Host byte order */
> > + __u32 ifindex; /* Maps to skb->dev->ifindex */
>
> Is the comment accurate?
> The bpf_sk_lookup_kern ifindex is populated with inet_iif(skb).
> Which is skb->skb_iif at this point (I think).
> skb->dev->ifindex would typically mean destination or egress ifindex.
> In __sk_buff we have 'ifindex' and 'ingress_ifindex' to differentiate them.
> If it's really dev->ifindex than keeping 'ifindex' name here would be correct,
> but looking at how it's populated in inet/udp_lookup makes me wonder
> whether it should be named 'ingress_ifindex' instead and comment clarified.
>
> If/when you resubmit please trim cc list to a minimum.

At least in the tcp cases its coming from inet_iif which is either
the rtable or skb->skb_iif. Agree would be nice to fixup the comment.

Thanks.