2023-01-17 17:53:36

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH] bpf: security enhancement by limiting the offensive eBPF helpers



On 1/17/23 7:12 AM, WritePaper wrote:
> The bpf_send_singal and bpf_override_return is similar to
> bpf_write_user and can affect userspace processes. Thus, these two
> helpers should also be constraint by security lockdown.
>
> Signed-off-by: WritePaper <[email protected]>
> ---
> include/linux/security.h | 3 +++
> kernel/trace/bpf_trace.c | 6 ++++--
> 2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 5b67f208f..cb90b2860 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -123,6 +123,9 @@ enum lockdown_reason {
> LOCKDOWN_DEBUGFS,
> LOCKDOWN_XMON_WR,
> LOCKDOWN_BPF_WRITE_USER,
> + LOCKDOWN_BPF_SEND_SIGNAL,
> + LOCKDOWN_BPF_OVERRIDE_RETURN,
> + LOCKDOWN_OFFENSIVE_BPF_MAX,

LOCKDOWN_OFFENSIVE_BPF_MAX is not used.

> LOCKDOWN_DBG_WRITE_KERNEL,
> LOCKDOWN_RTAS_ERROR_INJECTION,
> LOCKDOWN_INTEGRITY_MAX,
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 3bbd3f0c8..3a80f4b6f 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1463,7 +1463,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_cgrp_storage_delete_proto;
> #endif
> case BPF_FUNC_send_signal:
> - return &bpf_send_signal_proto;
> + return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
> + NULL : &bpf_send_signal_proto;

You should add the same security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL)
check with below bpf_send_signal_thread() helper.

> case BPF_FUNC_send_signal_thread:
> return &bpf_send_signal_thread_proto;
> case BPF_FUNC_perf_event_read_value:
> @@ -1531,7 +1532,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_get_stack_proto;
> #ifdef CONFIG_BPF_KPROBE_OVERRIDE
> case BPF_FUNC_override_return:
> - return &bpf_override_return_proto;
> + return security_locked_down(LOCKDOWN_BPF_OVERRIDE_RETURN) < 0 ?
> + NULL : &bpf_override_return_proto;
> #endif
> case BPF_FUNC_get_func_ip:
> return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?


2023-01-18 02:48:19

by Yi He

[permalink] [raw]
Subject: [PATCH V2] bpf: security enhancement by limiting the offensive eBPF helpers

The bpf_send_singal, bpf_send_singal_thread and bpf_override_return
is similar to bpf_write_user and can affect userspace processes.
Thus, these three helpers should also be restricted by security lockdown.

Signed-off-by: Yi He <[email protected]>
---
V1 -> V2: add security lockdown to bpf_send_singal_thread and remove
the unused LOCKDOWN_OFFENSIVE_BPF_MAX.

include/linux/security.h | 2 ++
kernel/trace/bpf_trace.c | 9 ++++++---
2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 5b67f208f..42420e620 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -123,6 +123,8 @@ enum lockdown_reason {
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
LOCKDOWN_BPF_WRITE_USER,
+ LOCKDOWN_BPF_SEND_SIGNAL,
+ LOCKDOWN_BPF_OVERRIDE_RETURN,
LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_RTAS_ERROR_INJECTION,
LOCKDOWN_INTEGRITY_MAX,
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3bbd3f0c8..fdb94868d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1463,9 +1463,11 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_cgrp_storage_delete_proto;
#endif
case BPF_FUNC_send_signal:
- return &bpf_send_signal_proto;
+ return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
+ NULL : &bpf_send_signal_proto;
case BPF_FUNC_send_signal_thread:
- return &bpf_send_signal_thread_proto;
+ return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
+ NULL : &bpf_send_signal_thread_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
case BPF_FUNC_get_ns_current_pid_tgid:
@@ -1531,7 +1533,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_stack_proto;
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
case BPF_FUNC_override_return:
- return &bpf_override_return_proto;
+ return security_locked_down(LOCKDOWN_BPF_OVERRIDE_RETURN) < 0 ?
+ NULL : &bpf_override_return_proto;
#endif
case BPF_FUNC_get_func_ip:
return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
--
2.25.1

2023-01-18 06:14:39

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH V2] bpf: security enhancement by limiting the offensive eBPF helpers



On 1/17/23 4:54 PM, Yi He wrote:
> The bpf_send_singal, bpf_send_singal_thread and bpf_override_return
> is similar to bpf_write_user and can affect userspace processes.
> Thus, these three helpers should also be restricted by security lockdown.
>
> Signed-off-by: Yi He <[email protected]>
> ---
> V1 -> V2: add security lockdown to bpf_send_singal_thread and remove
> the unused LOCKDOWN_OFFENSIVE_BPF_MAX.
>
> include/linux/security.h | 2 ++
> kernel/trace/bpf_trace.c | 9 ++++++---
> 2 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 5b67f208f..42420e620 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -123,6 +123,8 @@ enum lockdown_reason {
> LOCKDOWN_DEBUGFS,
> LOCKDOWN_XMON_WR,
> LOCKDOWN_BPF_WRITE_USER,
> + LOCKDOWN_BPF_SEND_SIGNAL,
> + LOCKDOWN_BPF_OVERRIDE_RETURN,
> LOCKDOWN_DBG_WRITE_KERNEL,
> LOCKDOWN_RTAS_ERROR_INJECTION,
> LOCKDOWN_INTEGRITY_MAX,

Also, do you need to add an entry in lockdown_reasons in
security/security.c?

Also add [email protected] so security experts can
chime in as well.


> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 3bbd3f0c8..fdb94868d 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1463,9 +1463,11 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_cgrp_storage_delete_proto;
> #endif
> case BPF_FUNC_send_signal:
> - return &bpf_send_signal_proto;
> + return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
> + NULL : &bpf_send_signal_proto;
> case BPF_FUNC_send_signal_thread:
> - return &bpf_send_signal_thread_proto;
> + return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
> + NULL : &bpf_send_signal_thread_proto;
> case BPF_FUNC_perf_event_read_value:
> return &bpf_perf_event_read_value_proto;
> case BPF_FUNC_get_ns_current_pid_tgid:
> @@ -1531,7 +1533,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_get_stack_proto;
> #ifdef CONFIG_BPF_KPROBE_OVERRIDE
> case BPF_FUNC_override_return:
> - return &bpf_override_return_proto;
> + return security_locked_down(LOCKDOWN_BPF_OVERRIDE_RETURN) < 0 ?
> + NULL : &bpf_override_return_proto;
> #endif
> case BPF_FUNC_get_func_ip:
> return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?

2023-01-18 10:51:38

by Daniel Borkmann

[permalink] [raw]
Subject: Re: [PATCH V2] bpf: security enhancement by limiting the offensive eBPF helpers

On 1/18/23 1:54 AM, Yi He wrote:
> The bpf_send_singal, bpf_send_singal_thread and bpf_override_return
> is similar to bpf_write_user and can affect userspace processes.
> Thus, these three helpers should also be restricted by security lockdown.
>
> Signed-off-by: Yi He <[email protected]>
> ---
> V1 -> V2: add security lockdown to bpf_send_singal_thread and remove
> the unused LOCKDOWN_OFFENSIVE_BPF_MAX.
>
> include/linux/security.h | 2 ++
> kernel/trace/bpf_trace.c | 9 ++++++---
> 2 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 5b67f208f..42420e620 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -123,6 +123,8 @@ enum lockdown_reason {
> LOCKDOWN_DEBUGFS,
> LOCKDOWN_XMON_WR,
> LOCKDOWN_BPF_WRITE_USER,
> + LOCKDOWN_BPF_SEND_SIGNAL,
> + LOCKDOWN_BPF_OVERRIDE_RETURN,
> LOCKDOWN_DBG_WRITE_KERNEL,
> LOCKDOWN_RTAS_ERROR_INJECTION,
> LOCKDOWN_INTEGRITY_MAX,

I'm not applying this.. i) this means by default you effectively remove these
helpers from existing users in the wild given integrity mode is default for
secure boot, but also ii) should we lock-down and remove the ability for other
privileged entities like processes to send signals, seccomp to ret_kill, ptrace,
etc given they all "can affect userspace processes". For the other one, check
out already existing FUNCTION_ERROR_INJECTION kernel config.

> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 3bbd3f0c8..fdb94868d 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1463,9 +1463,11 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_cgrp_storage_delete_proto;
> #endif
> case BPF_FUNC_send_signal:
> - return &bpf_send_signal_proto;
> + return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
> + NULL : &bpf_send_signal_proto;
> case BPF_FUNC_send_signal_thread:
> - return &bpf_send_signal_thread_proto;
> + return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
> + NULL : &bpf_send_signal_thread_proto;
> case BPF_FUNC_perf_event_read_value:
> return &bpf_perf_event_read_value_proto;
> case BPF_FUNC_get_ns_current_pid_tgid:
> @@ -1531,7 +1533,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_get_stack_proto;
> #ifdef CONFIG_BPF_KPROBE_OVERRIDE
> case BPF_FUNC_override_return:
> - return &bpf_override_return_proto;
> + return security_locked_down(LOCKDOWN_BPF_OVERRIDE_RETURN) < 0 ?
> + NULL : &bpf_override_return_proto;
> #endif
> case BPF_FUNC_get_func_ip:
> return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
>

2023-01-18 12:39:15

by Yi He

[permalink] [raw]
Subject: [PATCH V2] bpf: security enhancement by limiting the offensive eBPF helpers

The bpf_send_singal, bpf_send_singal_thread and bpf_override_return
is similar to bpf_write_user and can affect userspace processes.
Thus, these three helpers should also be restricted by security lockdown.

Signed-off-by: Yi He <[email protected]>
---

Thanks for your feedback.

This patch aims to mitigate the offensive eBPF problem which has been dicussed since 2019 [1]. Recently, we find that enable eBPF in container environemnt can lead to container escape or cross-nodes attacks (which may compromise mutiple VMs) in the Kubernetes [2]. Since lots of eBPF based tools are used in containers, mutiple containers have the CAP_SYS_ADMIN needed by eBPF which may be abused by untrusted eBPF code.

We are still working for a better fine-grained eBPF permission model which add capability fitler bits to control the permissions of different eBPF program types and helper functions of a processes [3].

Security lockdown seems to be a simple way to mitigate this problem. It only restrict all the offensive features and enable other eBPF features needed by benign eBPF program such as Cillium (which do not use these offensive features but only need bpf_read_user).

> I'm not applying this.. i) this means by default you effectively remove these
> helpers from existing users in the wild given integrity mode is default for
> secure boot, but also ii) should we lock-down and remove the ability for other
> privileged entities like processes to send signals, seccomp to ret_kill, ptrace,
> etc given they all "can affect userspace processes"

It does not affect other privielge processes (e.g., ptrace) to kill process. Seccomp is classic bpf does not use this eBPF helper [4].

> check out already existing FUNCTION_ERROR_INJECTION kernel config.
We do not think the FUNCTION_ERROR_INJECTION config can solve this problem as this option is default enable in many linux distributions such as debian/ubuntu. All the syscall are in allowlist of error injection and can be attacked by evil eBPF via eBPF override return.

We hop you can rethink this problem.

[1]. J. Dileo. Evil eBPF: Practical Abuses of an In-Kernel Bytecode Runtime. DEFCON 27
[2]. https://rolandorange.zone/report.html
[3]. https://lore.kernel.org/bpf/CAADnVQK4ucv=LugqZ3He9ubwdxDu6ohaBKr2E=TX0UT65+7WpQ@mail.gmail.com/T/
[4]. https://elixir.bootlin.com/linux/v6.2-rc4/source/kernel/seccomp.c#L1304


V1 -> V2: add security lockdown to bpf_send_singal_thread and remove
the unused LOCKDOWN_OFFENSIVE_BPF_MAX.

include/linux/security.h | 2 ++
kernel/trace/bpf_trace.c | 9 ++++++---
2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 5b67f208f..42420e620 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -123,6 +123,8 @@ enum lockdown_reason {
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
LOCKDOWN_BPF_WRITE_USER,
+ LOCKDOWN_BPF_SEND_SIGNAL,
+ LOCKDOWN_BPF_OVERRIDE_RETURN,
LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_RTAS_ERROR_INJECTION,
LOCKDOWN_INTEGRITY_MAX,
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3bbd3f0c8..fdb94868d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1463,9 +1463,11 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_cgrp_storage_delete_proto;
#endif
case BPF_FUNC_send_signal:
- return &bpf_send_signal_proto;
+ return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
+ NULL : &bpf_send_signal_proto;
case BPF_FUNC_send_signal_thread:
- return &bpf_send_signal_thread_proto;
+ return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
+ NULL : &bpf_send_signal_thread_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
case BPF_FUNC_get_ns_current_pid_tgid:
@@ -1531,7 +1533,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_stack_proto;
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
case BPF_FUNC_override_return:
- return &bpf_override_return_proto;
+ return security_locked_down(LOCKDOWN_BPF_OVERRIDE_RETURN) < 0 ?
+ NULL : &bpf_override_return_proto;
#endif
case BPF_FUNC_get_func_ip:
return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
--
2.25.1

2023-01-18 15:47:43

by Djalal Harouni

[permalink] [raw]
Subject: Re: [PATCH V2] bpf: security enhancement by limiting the offensive eBPF helpers

On Wed, Jan 18, 2023 at 1:38 PM Yi He <[email protected]> wrote:
[...]
> Thanks for your feedback.
>
> This patch aims to mitigate the offensive eBPF problem which has been dicussed since 2019 [1]. Recently, we find that enable eBPF in container environemnt can lead to container escape or cross-nodes attacks (which may compromise mutiple VMs) in the Kubernetes [2]. Since lots of eBPF based tools are used in containers, mutiple containers have the CAP_SYS_ADMIN needed by eBPF which may be abused by untrusted eBPF code.

Then solution should be toward restricting eBPF in container, there is already
sysctl, per process seccomp, LSM + bpf LSM for that.

...
> > I'm not applying this.. i) this means by default you effectively remove these
> > helpers from existing users in the wild given integrity mode is default for
> > secure boot, but also ii) should we lock-down and remove the ability for other
> > privileged entities like processes to send signals, seccomp to ret_kill, ptrace,
> > etc given they all "can affect userspace processes"
>
> It does not affect other privielge processes (e.g., ptrace) to kill process. Seccomp is classic bpf does not use this eBPF helper [4].

Those are more or less same as bpf sending signal. Supervisors are using
seccomp to ret kill process and/or sending signals. Where will you draw the
line? should we go restrict those too? IMHO this does not relate to lockdown.

This reasoning will kill any effort to improve sandbox mechanisms that are
moving some functionality from seccomp ret kill to a more flexible and
transparent bpf-LSM model where privileged installs the sandbox. Actually,
we are already doing this and beside eBPF flexibility and transparency
(change policy at runtime without restart) from a _user perspective_
I don't see that much difference between a seccomp kill and ebpf signal.

Thanks!

2023-01-19 05:28:28

by Yi He

[permalink] [raw]
Subject: [PATCH V2] bpf: security enhancement by limiting the offensive eBPF helpers

The bpf_send_singal, bpf_send_singal_thread and bpf_override_return
is similar to bpf_write_user and can affect userspace processes.
Thus, these three helpers should also be restricted by security lockdown.

Signed-off-by: Yi He <[email protected]>
---

Thanks for you reply.

I have studied this problem for months. I would like to give more details to
clarify why these two helpers can break the INTEGRITY and should be lockdown.

First, this helpers are only for eBPF tracing programs. LSM-bpf and seccomp do
not need them. The documents say the two functions are experimental.
Now the eBPF products (e.g., Cillium, Falco) seldom use them but the evil eBPF
can abuse them.

Second, override_return is similar to bpf_write_user can defintely break the
INTEGRITY by altering other processes' system call or kernel functions
(KProbe)'s return code.

> Then solution should be toward restricting eBPF in container, there is already
> sysctl, per process seccomp, LSM + bpf LSM for that.
Yes, the solution is for restricting eBPF in container. But a fine-gained access
control is required, such as assigning different eBPF privilege to various containers,
rather than just disable eBPF in a container.

The mechanisms you mententioned do not properly sovle the problem.
sysctl can only disable the unprivielge
users to access eBPF via the kernel.unprivileged_bpf_disabled flag. The untrusted eBPF
are installed by privielge users inside a container but can harm the whole system and
other shared-kernel containers.
seccomp also can only disable the bpf system call to totally disable eBPF while we may
need to selectively enable the benign features of eBPF and disallow the offensive features
which may be abused.
LSM + bpf LSM can implement this functionality. However, it is difficult to identify
a process from a container [1] as at many LSM hooks, we can only get a process's pid and
name which can be forged by the mailicous program. A correct way is to use the inode number
to set policy for benign processes. Moreover, the LSM bpf's overhead is unacceptable.

[1]. https://blog.doyensec.com/2022/10/11/ebpf-bypass-security-monitoring.html

> Those are more or less same as bpf sending signal. Supervisors are using
> seccomp to ret kill process and/or sending signals. Where will you draw the
> line? should we go restrict those too? IMHO this does not relate to lockdown.
> I don't see that much difference between a seccomp kill and ebpf signal.

The bpf_send_singal is different to any other signal sending functions as it
enables a eBPF tracing program from a container to kill any processes
(even the privielge proceess) of the host or other containers.
Supervisors and seccomp can only kill its child process. Other signal sending
do not need to be restricted as they can not be used inside a container to kill
any processes outside of a container.

> This reasoning will kill any effort to improve sandbox mechanisms that are
> moving some functionality from seccomp ret kill to a more flexible and
> transparent bpf-LSM model where privileged installs the sandbox. Actually,
> we are already doing this and beside eBPF flexibility and transparency
> (change policy at runtime without restart) from a _user perspective_
We will try to implement alternative mechanisms for constrained eBPF
features only since the LSM-bpf have shortages in both flexibility and
performance.

This patch is only for blocking the offensive features of eBPF and avoiding themĀ 
affecting the INTEGRITY of the container, given that the evil eBPF can abuse these
helpers to affect any processes running in inside or outside of the container,
sharing the same kernel.

[1]. https://github.com/Gui774ume/krie/blob/master/ebpf/krie/hooks/lsm.h

include/linux/security.h | 2 ++
kernel/trace/bpf_trace.c | 9 ++++++---
2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 5b67f208f..42420e620 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -123,6 +123,8 @@ enum lockdown_reason {
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
LOCKDOWN_BPF_WRITE_USER,
+ LOCKDOWN_BPF_SEND_SIGNAL,
+ LOCKDOWN_BPF_OVERRIDE_RETURN,
LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_RTAS_ERROR_INJECTION,
LOCKDOWN_INTEGRITY_MAX,
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3bbd3f0c8..fdb94868d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1463,9 +1463,11 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_cgrp_storage_delete_proto;
#endif
case BPF_FUNC_send_signal:
- return &bpf_send_signal_proto;
+ return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
+ NULL : &bpf_send_signal_proto;
case BPF_FUNC_send_signal_thread:
- return &bpf_send_signal_thread_proto;
+ return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
+ NULL : &bpf_send_signal_thread_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
case BPF_FUNC_get_ns_current_pid_tgid:
@@ -1531,7 +1533,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_stack_proto;
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
case BPF_FUNC_override_return:
- return &bpf_override_return_proto;
+ return security_locked_down(LOCKDOWN_BPF_OVERRIDE_RETURN) < 0 ?
+ NULL : &bpf_override_return_proto;
#endif
case BPF_FUNC_get_func_ip:
return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
--
2.25.1

2023-01-20 01:48:02

by KP Singh

[permalink] [raw]
Subject: Re: [PATCH V2] bpf: security enhancement by limiting the offensive eBPF helpers

On Thu, Jan 19, 2023 at 5:25 AM Yi He <[email protected]> wrote:
>
> The bpf_send_singal, bpf_send_singal_thread and bpf_override_return
> is similar to bpf_write_user and can affect userspace processes.
> Thus, these three helpers should also be restricted by security lockdown.
>
> Signed-off-by: Yi He <[email protected]>
> ---
>
> Thanks for you reply.
>
> I have studied this problem for months. I would like to give more details to
> clarify why these two helpers can break the INTEGRITY and should be lockdown.
>
> First, this helpers are only for eBPF tracing programs. LSM-bpf and seccomp do
> not need them. The documents say the two functions are experimental.
> Now the eBPF products (e.g., Cillium, Falco) seldom use them but the evil eBPF
> can abuse them.

This has got nothing to do with breaking integrity.

>
> Second, override_return is similar to bpf_write_user can defintely break the
> INTEGRITY by altering other processes' system call or kernel functions
> (KProbe)'s return code.

Then any fault injection can break integrity? This is another can of
worms waiting to be opened here. It's not productive and doesn't help.
Let's focus on a better / flexible MAC policy here.

>
> > Then solution should be toward restricting eBPF in container, there is already
> > sysctl, per process seccomp, LSM + bpf LSM for that.
> Yes, the solution is for restricting eBPF in container. But a fine-gained access
> control is required, such as assigning different eBPF privilege to various containers,
> rather than just disable eBPF in a container.
>
You might want to reconsider your threat model about allowing partial
unprivileged access to eBPF, even if it's partial access. What you are
eventually going to run into is side channel attacks (please look at
the recent side channel issues exploited with unprivileged eBPF and
you don't need "offensive helpers" for these).

> The mechanisms you mententioned do not properly sovle the problem.
> sysctl can only disable the unprivielge
> users to access eBPF via the kernel.unprivileged_bpf_disabled flag. The untrusted eBPF
> are installed by privielge users inside a container but can harm the whole system and
> other shared-kernel containers.
> seccomp also can only disable the bpf system call to totally disable eBPF while we may
> need to selectively enable the benign features of eBPF and disallow the offensive features
> which may be abused.
> LSM + bpf LSM can implement this functionality. However, it is difficult to identify
> a process from a container [1] as at many LSM hooks, we can only get a process's pid and

It's not difficult. The core primitive is that the container manager
generates a unique ID per process and sets it on the task blob / local
storage (this can be flexible on what a definition of a container is
in your runtime [i.e a mix and match of different namespace ids)].
This can then be passed on the subprocesses via LSM hooks

> name which can be forged by the mailicous program. A correct way is to use the inode number
> to set policy for benign processes. Moreover, the LSM bpf's overhead is unacceptable.

BPF LSM is the worst case, but all LSMs have overhead. I hear you and
here's my series to address this overhead.

https://lore.kernel.org/bpf/[email protected]/T/#t
[hasn't been delivered fully by vger yet] so I resent it as
https://lore.kernel.org/linux-security-module/[email protected]/T/#t
and, as of now, between them they have most of the patches.

>
> [1]. https://blog.doyensec.com/2022/10/11/ebpf-bypass-security-monitoring.html
>
> > Those are more or less same as bpf sending signal. Supervisors are using
> > seccomp to ret kill process and/or sending signals. Where will you draw the
> > line? should we go restrict those too? IMHO this does not relate to lockdown.
> > I don't see that much difference between a seccomp kill and ebpf signal.
>
> The bpf_send_singal is different to any other signal sending functions as it
> enables a eBPF tracing program from a container to kill any processes
> (even the privielge proceess) of the host or other containers.
> Supervisors and seccomp can only kill its child process. Other signal sending
> do not need to be restricted as they can not be used inside a container to kill
> any processes outside of a container.
>
> > This reasoning will kill any effort to improve sandbox mechanisms that are
> > moving some functionality from seccomp ret kill to a more flexible and
> > transparent bpf-LSM model where privileged installs the sandbox. Actually,
> > we are already doing this and beside eBPF flexibility and transparency
> > (change policy at runtime without restart) from a _user perspective_
> We will try to implement alternative mechanisms for constrained eBPF
> features only since the LSM-bpf have shortages in both flexibility and
> performance.
>
> This patch is only for blocking the offensive features of eBPF and avoiding them
> affecting the INTEGRITY of the container, given that the evil eBPF can abuse these
> helpers to affect any processes running in inside or outside of the container,
> sharing the same kernel.

This has got nothing to do with kernel integrity. Please don't try to
shoe-horn your policy requirements into integrity.

If you really want to go with your threat model and allow partially
available unprivileged BPF, this should be done via BPF LSM. The
kernel should not try to adjust its definition of integrity based on a
rather questionable threat model. Let's fix the LSM overhead and
unblock flexibility for your use-case.

- KP


>
> [1]. https://github.com/Gui774ume/krie/blob/master/ebpf/krie/hooks/lsm.h
>
> include/linux/security.h | 2 ++
> kernel/trace/bpf_trace.c | 9 ++++++---
> 2 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 5b67f208f..42420e620 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -123,6 +123,8 @@ enum lockdown_reason {
> LOCKDOWN_DEBUGFS,
> LOCKDOWN_XMON_WR,
> LOCKDOWN_BPF_WRITE_USER,
> + LOCKDOWN_BPF_SEND_SIGNAL,
> + LOCKDOWN_BPF_OVERRIDE_RETURN,
> LOCKDOWN_DBG_WRITE_KERNEL,
> LOCKDOWN_RTAS_ERROR_INJECTION,
> LOCKDOWN_INTEGRITY_MAX,
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 3bbd3f0c8..fdb94868d 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1463,9 +1463,11 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_cgrp_storage_delete_proto;
> #endif
> case BPF_FUNC_send_signal:
> - return &bpf_send_signal_proto;
> + return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
> + NULL : &bpf_send_signal_proto;
> case BPF_FUNC_send_signal_thread:
> - return &bpf_send_signal_thread_proto;
> + return security_locked_down(LOCKDOWN_BPF_SEND_SIGNAL) < 0 ?
> + NULL : &bpf_send_signal_thread_proto;
> case BPF_FUNC_perf_event_read_value:
> return &bpf_perf_event_read_value_proto;
> case BPF_FUNC_get_ns_current_pid_tgid:
> @@ -1531,7 +1533,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_get_stack_proto;
> #ifdef CONFIG_BPF_KPROBE_OVERRIDE
> case BPF_FUNC_override_return:
> - return &bpf_override_return_proto;
> + return security_locked_down(LOCKDOWN_BPF_OVERRIDE_RETURN) < 0 ?
> + NULL : &bpf_override_return_proto;
> #endif
> case BPF_FUNC_get_func_ip:
> return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
> --
> 2.25.1
>