From: Menglong Dong <[email protected]>
The return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() in
__inet_bind() is not handled properly. While the return value
is non-zero, it will set inet_saddr and inet_rcv_saddr to 0 and
exit:
exit:
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
if (err) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
goto out_release_sock;
}
Let's take UDP for example and see what will happen. For UDP
socket, it will be added to 'udp_prot.h.udp_table->hash' and
'udp_prot.h.udp_table->hash2' after the sk->sk_prot->get_port()
called success. If 'inet->inet_rcv_saddr' is specified here,
then 'sk' will be in the 'hslot2' of 'hash2' that it don't belong
to (because inet_saddr is changed to 0), and UDP packet received
will not be passed to this sock. If 'inet->inet_rcv_saddr' is not
specified here, the sock will work fine, as it can receive packet
properly, which is wired, as the 'bind()' is already failed.
To undo the get_port() operation, introduce the 'put_port' field
for 'struct proto'. For TCP proto, it is inet_put_port(); For UDP
proto, it is udp_lib_unhash(); For icmp proto, it is
ping_unhash().
Therefore, after sys_bind() fail caused by
BPF_CGROUP_RUN_PROG_INET4_POST_BIND(), it will be unbinded, which
means that it can try to be binded to another port.
The second patch use C99 initializers in test_sock.c
The third patch is the selftests for this modification.
Changes since v4:
- use C99 initializers in test_sock.c before adding the test case
Changes since v3:
- add the third patch which use C99 initializers in test_sock.c
Changes since v2:
- NULL check for sk->sk_prot->put_port
Changes since v1:
- introduce 'put_port' field for 'struct proto'
- add selftests for it
Menglong Dong (3):
net: bpf: handle return value of
BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND()
bpf: selftests: use C99 initializers in test_sock.c
bpf: selftests: add bind retry for post_bind{4, 6}
include/net/sock.h | 1 +
net/ipv4/af_inet.c | 2 +
net/ipv4/ping.c | 1 +
net/ipv4/tcp_ipv4.c | 1 +
net/ipv4/udp.c | 1 +
net/ipv6/af_inet6.c | 2 +
net/ipv6/ping.c | 1 +
net/ipv6/tcp_ipv6.c | 1 +
net/ipv6/udp.c | 1 +
tools/testing/selftests/bpf/test_sock.c | 370 ++++++++++++++----------
10 files changed, 233 insertions(+), 148 deletions(-)
--
2.27.0
From: Menglong Dong <[email protected]>
The return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() in
__inet_bind() is not handled properly. While the return value
is non-zero, it will set inet_saddr and inet_rcv_saddr to 0 and
exit:
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
if (err) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
goto out_release_sock;
}
Let's take UDP for example and see what will happen. For UDP
socket, it will be added to 'udp_prot.h.udp_table->hash' and
'udp_prot.h.udp_table->hash2' after the sk->sk_prot->get_port()
called success. If 'inet->inet_rcv_saddr' is specified here,
then 'sk' will be in the 'hslot2' of 'hash2' that it don't belong
to (because inet_saddr is changed to 0), and UDP packet received
will not be passed to this sock. If 'inet->inet_rcv_saddr' is not
specified here, the sock will work fine, as it can receive packet
properly, which is wired, as the 'bind()' is already failed.
To undo the get_port() operation, introduce the 'put_port' field
for 'struct proto'. For TCP proto, it is inet_put_port(); For UDP
proto, it is udp_lib_unhash(); For icmp proto, it is
ping_unhash().
Therefore, after sys_bind() fail caused by
BPF_CGROUP_RUN_PROG_INET4_POST_BIND(), it will be unbinded, which
means that it can try to be binded to another port.
Signed-off-by: Menglong Dong <[email protected]>
---
v3:
- NULL check for sk->sk_prot->put_port
v2:
- introduce 'put_port' field for 'struct proto'
---
include/net/sock.h | 1 +
net/ipv4/af_inet.c | 2 ++
net/ipv4/ping.c | 1 +
net/ipv4/tcp_ipv4.c | 1 +
net/ipv4/udp.c | 1 +
net/ipv6/af_inet6.c | 2 ++
net/ipv6/ping.c | 1 +
net/ipv6/tcp_ipv6.c | 1 +
net/ipv6/udp.c | 1 +
9 files changed, 11 insertions(+)
diff --git a/include/net/sock.h b/include/net/sock.h
index 7b4b4237e6e0..ff9b508d9c5f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1209,6 +1209,7 @@ struct proto {
void (*unhash)(struct sock *sk);
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
+ void (*put_port)(struct sock *sk);
#ifdef CONFIG_BPF_SYSCALL
int (*psock_update_sk_prot)(struct sock *sk,
struct sk_psock *psock,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f53184767ee7..9c465bac1eb0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -531,6 +531,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
if (err) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ if (sk->sk_prot->put_port)
+ sk->sk_prot->put_port(sk);
goto out_release_sock;
}
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index e540b0dcf085..0e56df3a45e2 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -994,6 +994,7 @@ struct proto ping_prot = {
.hash = ping_hash,
.unhash = ping_unhash,
.get_port = ping_get_port,
+ .put_port = ping_unhash,
.obj_size = sizeof(struct inet_sock),
};
EXPORT_SYMBOL(ping_prot);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ac10e4cdd8d0..9861786b8336 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3076,6 +3076,7 @@ struct proto tcp_prot = {
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
+ .put_port = inet_put_port,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = tcp_bpf_update_proto,
#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7b18a6f42f18..c2a4411d2b04 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2927,6 +2927,7 @@ struct proto udp_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v4_rehash,
.get_port = udp_v4_get_port,
+ .put_port = udp_lib_unhash,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d1636425654e..8fe7900f1949 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -413,6 +413,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
if (err) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
+ if (sk->sk_prot->put_port)
+ sk->sk_prot->put_port(sk);
goto out;
}
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 6ac88fe24a8e..9256f6ba87ef 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -177,6 +177,7 @@ struct proto pingv6_prot = {
.hash = ping_hash,
.unhash = ping_unhash,
.get_port = ping_get_port,
+ .put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
};
EXPORT_SYMBOL_GPL(pingv6_prot);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1ac243d18c2b..075ee8a2df3b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2181,6 +2181,7 @@ struct proto tcpv6_prot = {
.hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
+ .put_port = inet_put_port,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = tcp_bpf_update_proto,
#endif
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1accc06abc54..90718a924ca8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1732,6 +1732,7 @@ struct proto udpv6_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v6_rehash,
.get_port = udp_v6_get_port,
+ .put_port = udp_lib_unhash,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
--
2.30.2
From: Menglong Dong <[email protected]>
Use C99 initializers for the initialization of 'tests' in test_sock.c.
Signed-off-by: Menglong Dong <[email protected]>
---
tools/testing/selftests/bpf/test_sock.c | 220 ++++++++++--------------
1 file changed, 92 insertions(+), 128 deletions(-)
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index e8edd3dd3ec2..94f9b126f5ed 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -46,7 +46,7 @@ struct sock_test {
static struct sock_test tests[] = {
{
- "bind4 load with invalid access: src_ip6",
+ .descr = "bind4 load with invalid access: src_ip6",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -54,16 +54,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "bind4 load with invalid access: mark",
+ .descr = "bind4 load with invalid access: mark",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -71,16 +67,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "bind6 load with invalid access: src_ip4",
+ .descr = "bind6 load with invalid access: src_ip4",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -88,16 +80,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "sock_create load with invalid access: src_port",
+ .descr = "sock_create load with invalid access: src_port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -105,128 +93,106 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET_SOCK_CREATE,
- BPF_CGROUP_INET_SOCK_CREATE,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .result = LOAD_REJECT,
},
{
- "sock_create load w/o expected_attach_type (compat mode)",
+ .descr = "sock_create load w/o expected_attach_type (compat mode)",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- 0,
- BPF_CGROUP_INET_SOCK_CREATE,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 8097,
- SUCCESS,
+ .expected_attach_type = 0,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 8097,
+ .result = SUCCESS,
},
{
- "sock_create load w/ expected_attach_type",
+ .descr = "sock_create load w/ expected_attach_type",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET_SOCK_CREATE,
- BPF_CGROUP_INET_SOCK_CREATE,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 8097,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 8097,
+ .result = SUCCESS,
},
{
- "attach type mismatch bind4 vs bind6",
+ .descr = "attach type mismatch bind4 vs bind6",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch bind6 vs bind4",
+ .descr = "attach type mismatch bind6 vs bind4",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch default vs bind4",
+ .descr = "attach type mismatch default vs bind4",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- 0,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = 0,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch bind6 vs sock_create",
+ .descr = "attach type mismatch bind6 vs sock_create",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET_SOCK_CREATE,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .result = ATTACH_REJECT,
},
{
- "bind4 reject all",
+ .descr = "bind4 reject all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "0.0.0.0",
- 0,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "0.0.0.0",
+ .result = BIND_REJECT,
},
{
- "bind6 reject all",
+ .descr = "bind6 reject all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::",
- 0,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::",
+ .result = BIND_REJECT,
},
{
- "bind6 deny specific IP & port",
+ .descr = "bind6 deny specific IP & port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -247,16 +213,16 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::1",
- 8193,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::1",
+ .port = 8193,
+ .result = BIND_REJECT,
},
{
- "bind4 allow specific IP & port",
+ .descr = "bind4 allow specific IP & port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -277,41 +243,39 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 4098,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .result = SUCCESS,
},
{
- "bind4 allow all",
+ .descr = "bind4 allow all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "0.0.0.0",
- 0,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "0.0.0.0",
+ .result = SUCCESS,
},
{
- "bind6 allow all",
+ .descr = "bind6 allow all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::",
- 0,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::",
+ .result = SUCCESS,
},
};
--
2.30.2
From: Menglong Dong <[email protected]>
With previous patch, kernel is able to 'put_port' after sys_bind()
fails. Add the test for that case: rebind another port after
sys_bind() fails. If the bind success, it means previous bind
operation is already undoed.
Signed-off-by: Menglong Dong <[email protected]>
---
tools/testing/selftests/bpf/test_sock.c | 150 ++++++++++++++++++++----
1 file changed, 130 insertions(+), 20 deletions(-)
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 94f9b126f5ed..fe10f8134278 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -35,12 +35,15 @@ struct sock_test {
/* Endpoint to bind() to */
const char *ip;
unsigned short port;
+ unsigned short port_retry;
/* Expected test result */
enum {
LOAD_REJECT,
ATTACH_REJECT,
BIND_REJECT,
SUCCESS,
+ RETRY_SUCCESS,
+ RETRY_REJECT
} result;
};
@@ -251,6 +254,99 @@ static struct sock_test tests[] = {
.port = 4098,
.result = SUCCESS,
},
+ {
+ .descr = "bind4 deny specific IP & port of TCP, and retry",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .port_retry = 5000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind4 deny specific IP & port of UDP, and retry",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .port_retry = 5000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind6 deny specific IP & port, and retry",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[3])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x00000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::1",
+ .port = 8193,
+ .port_retry = 9000,
+ .result = RETRY_SUCCESS,
+ },
{
.descr = "bind4 allow all",
.insns = {
@@ -315,14 +411,15 @@ static int attach_sock_prog(int cgfd, int progfd,
return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
}
-static int bind_sock(int domain, int type, const char *ip, unsigned short port)
+static int bind_sock(int domain, int type, const char *ip,
+ unsigned short port, unsigned short port_retry)
{
struct sockaddr_storage addr;
struct sockaddr_in6 *addr6;
struct sockaddr_in *addr4;
int sockfd = -1;
socklen_t len;
- int err = 0;
+ int res = SUCCESS;
sockfd = socket(domain, type, 0);
if (sockfd < 0)
@@ -348,21 +445,44 @@ static int bind_sock(int domain, int type, const char *ip, unsigned short port)
goto err;
}
- if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
- goto err;
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
+ /* sys_bind() may fail for different reasons, errno has to be
+ * checked to confirm that BPF program rejected it.
+ */
+ if (errno != EPERM)
+ goto err;
+ if (port_retry)
+ goto retry;
+ res = BIND_REJECT;
+ goto out;
+ }
+ goto out;
+retry:
+ if (domain == AF_INET)
+ addr4->sin_port = htons(port_retry);
+ else
+ addr6->sin6_port = htons(port_retry);
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
+ if (errno != EPERM)
+ goto err;
+ res = RETRY_REJECT;
+ } else {
+ res = RETRY_SUCCESS;
+ }
goto out;
err:
- err = -1;
+ res = -1;
out:
close(sockfd);
- return err;
+ return res;
}
static int run_test_case(int cgfd, const struct sock_test *test)
{
int progfd = -1;
int err = 0;
+ int res;
printf("Test case: %s .. ", test->descr);
progfd = load_sock_prog(test->insns, test->expected_attach_type);
@@ -380,21 +500,11 @@ static int run_test_case(int cgfd, const struct sock_test *test)
goto err;
}
- if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
- /* sys_bind() may fail for different reasons, errno has to be
- * checked to confirm that BPF program rejected it.
- */
- if (test->result == BIND_REJECT && errno == EPERM)
- goto out;
- else
- goto err;
- }
-
+ res = bind_sock(test->domain, test->type, test->ip, test->port,
+ test->port_retry);
+ if (res > 0 && test->result == res)
+ goto out;
- if (test->result != SUCCESS)
- goto err;
-
- goto out;
err:
err = -1;
out:
--
2.30.2
On Thu, Jan 6, 2022 at 5:20 AM <[email protected]> wrote:
>
> From: Menglong Dong <[email protected]>
>
> The return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() in
> __inet_bind() is not handled properly. While the return value
> is non-zero, it will set inet_saddr and inet_rcv_saddr to 0 and
> exit:
> exit:
>
> err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
> if (err) {
> inet->inet_saddr = inet->inet_rcv_saddr = 0;
> goto out_release_sock;
> }
>
> Let's take UDP for example and see what will happen. For UDP
> socket, it will be added to 'udp_prot.h.udp_table->hash' and
> 'udp_prot.h.udp_table->hash2' after the sk->sk_prot->get_port()
> called success. If 'inet->inet_rcv_saddr' is specified here,
> then 'sk' will be in the 'hslot2' of 'hash2' that it don't belong
> to (because inet_saddr is changed to 0), and UDP packet received
> will not be passed to this sock. If 'inet->inet_rcv_saddr' is not
> specified here, the sock will work fine, as it can receive packet
> properly, which is wired, as the 'bind()' is already failed.
>
> To undo the get_port() operation, introduce the 'put_port' field
> for 'struct proto'. For TCP proto, it is inet_put_port(); For UDP
> proto, it is udp_lib_unhash(); For icmp proto, it is
> ping_unhash().
>
> Therefore, after sys_bind() fail caused by
> BPF_CGROUP_RUN_PROG_INET4_POST_BIND(), it will be unbinded, which
> means that it can try to be binded to another port.
>
> The second patch use C99 initializers in test_sock.c
>
> The third patch is the selftests for this modification.
>
> Changes since v4:
> - use C99 initializers in test_sock.c before adding the test case
Thanks for the fix. Applied.
In the future please make sure to tag your patches with
[PATCH bpf-next]
so that BPF CI can properly test it right away.