2023-01-14 09:23:56

by Roland

[permalink] [raw]
Subject: [PATCH] bpf: Add CONFIG_BPF_HELPER_STRICT

In container environment, ebpf helpers could be used maliciously to
leak information, DOS, even escape from containers.
CONFIG_BPF_HELPER_STRICT is as a mitigation of it.
Related Link: https://rolandorange.zone/report.html

Signed-off-by: Roland <[email protected]>
---
include/linux/kernel.h | 7 ++++++
include/linux/sched.h | 4 +++-
include/uapi/linux/bpf.h | 4 ++++
include/uapi/linux/sched.h | 4 ++++
kernel/bpf/Kconfig | 6 +++++
kernel/bpf/syscall.c | 48 +++++++++++++++++++++++++++++++++++---
kernel/fork.c | 13 +++++++++++
kernel/trace/bpf_trace.c | 29 +++++++++++++++++++++--
8 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fe6efb24d..61f7fcbc9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -509,3 +509,10 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
BUILD_BUG_ON_ZERO((perms) & 2) + \
(perms))
#endif
+
+#ifdef CONFIG_BPF_HELPER_STRICT
+# define BPF_PROBE_WRITE_BIT 1
+# define BPF_PROBE_READ_BIT 2
+# define BPF_SEND_SIGNAL_BIT 3
+# define BPF_OVERRIDE_RETURN_BIT 4
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ffb6eb55c..a3ec52056 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -760,7 +760,9 @@ struct task_struct {
/* Per task flags (PF_*), defined further below: */
unsigned int flags;
unsigned int ptrace;
-
+#ifdef CONFIG_BPF_HELPER_STRICT
+ unsigned int bpf_helper_bitfield;
+#endif
#ifdef CONFIG_SMP
int on_cpu;
struct __call_single_node wake_entry;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 51b9aa640..99a90d0f5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -900,6 +900,7 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_HELPER_BITS_SET,
};

enum bpf_map_type {
@@ -1326,6 +1327,9 @@ union bpf_attr {
* to using 5 hash functions).
*/
__u64 map_extra;
+#ifdef CONFIG_BPF_HELPER_STRICT
+ __u32 security_helper_bits;
+#endif
};

struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ce..c2fd463be 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -43,6 +43,10 @@
*/
#define CLONE_NEWTIME 0x00000080 /* New time namespace */

+#ifdef CONFIG_BPF_HELPER_STRICT
+#define CLONE_BITFIELD 0x00000040 /* set if bpf_helper_bitfield shared between processes */
+#endif
+
#ifndef __ASSEMBLY__
/**
* struct clone_args - arguments for the clone3 syscall
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index 2dfe1079f..c2c2fcf76 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -99,4 +99,10 @@ config BPF_LSM

If you are unsure how to answer this question, answer N.

+config BPF_HELPER_STRICT
+ bool "Enable BPF HELPER Check bits"
+ depends on BPF_SYSCALL
+ help
+ Enable several check bits for bpf helpers' security improvements.
+
+ BPF_HELPER_STRICT restricts the function of helpers.
+ Currently it can be used for restrict override return,
+ read, write, and send signal.
endmenu # "BPF subsystem"
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7b373a5e8..2f05ea50f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -68,6 +68,45 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
#undef BPF_LINK_TYPE
};

+#ifdef CONFIG_BPF_HELPER_STRICT
+static __always_inline int HelperWrite(unsigned int bits)
+{
+ return ((unsigned int)bits & BPF_PROBE_WRITE_BIT) != 0;
+}
+static __always_inline int HelperRead(unsigned int bits)
+{
+ return ((unsigned int)bits & BPF_PROBE_READ_BIT) != 0;
+}
+static __always_inline int HelperSendSignal(unsigned int bits)
+{
+ return ((unsigned int)bits & BPF_SEND_SIGNAL_BIT) != 0;
+}
+static __always_inline int HelperOverrideReturn(unsigned int bits)
+{
+ return ((unsigned int)bits & BPF_OVERRIDE_RETURN_BIT) != 0;
+}
+int bpf_set_security_helper_bits(union bpf_attr *attr)
+{
+ int res;
+ unsigned int bits_to_set;
+ unsigned int expected_bpf_helper_bitfield = 0;
+
+ bits_to_set = attr->security_helper_bits;
+
+ if (HelperWrite(bits_to_set))
+ expected_bpf_helper_bitfield |= 1 << BPF_PROBE_WRITE_BIT;
+ if (HelperRead(bits_to_set))
+ expected_bpf_helper_bitfield |= 1 << BPF_PROBE_READ_BIT;
+ if (HelperSendSignal(bits_to_set))
+ expected_bpf_helper_bitfield |= 1 << BPF_SEND_SIGNAL_BIT;
+ if (HelperOverrideReturn(bits_to_set))
+ expected_bpf_helper_bitfield |= 1 << BPF_OVERRIDE_RETURN_BIT;
+
+ current->bpf_helper_bitfield = expected_bpf_helper_bitfield;
+ res = 0;
+ return res;
+}
+#endif
/*
* If we're handed a bigger struct than we know of, ensure all the unknown bits
* are 0 - i.e. new user-space does not rely on any kernel feature extensions
@@ -4913,7 +4952,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
union bpf_attr attr;
bool capable;
int err;
-
+
capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled;

/* Intent here is for unprivileged_bpf_disabled to block key object
@@ -4925,7 +4964,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
* and other operations.
*/
if (!capable &&
- (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD))
+ (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD || cmd == BPF_HELPER_BITS_SET))
return -EPERM;

err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
@@ -4938,7 +4977,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
if (copy_from_bpfptr(&attr, uattr, size) != 0)
return -EFAULT;

- err = security_bpf(cmd, &attr, size);
+ err = security_bpf(cmd, &attr, size);
if (err < 0)
return err;

@@ -5056,6 +5095,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
case BPF_PROG_BIND_MAP:
err = bpf_prog_bind_map(&attr);
break;
+ case BPF_HELPER_BITS_SET:
+ err = bpf_set_security_helper_bits(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa..6168da0b8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1998,6 +1998,10 @@ static __latent_entropy struct task_struct *copy_process(
const u64 clone_flags = args->flags;
struct nsproxy *nsp = current->nsproxy;

+#ifdef CONFIG_BPF_HELPER_STRICT
+ unsigned int bitfield = current->bpf_helper_bitfield;
+#endif
+
/*
* Don't allow sharing the root directory with processes in a different
* namespace
@@ -2102,6 +2106,7 @@ static __latent_entropy struct task_struct *copy_process(
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;

+
ftrace_graph_init_task(p);

rt_mutex_init_task(p);
@@ -2490,6 +2495,14 @@ static __latent_entropy struct task_struct *copy_process(

copy_oom_score_adj(clone_flags, p);

+#ifdef CONFIG_BPF_HELPER_STRICT
+ /* Only if explicit set CLONE_BITFIELD or
+ * the forked process has both CAP_BPF and CAP_SYS_ADMIN,
+ * we will set the bitfield
+ */
+ p->bpf_helper_bitfield = (clone_flags & CLONE_BITFIELD) ? bitfield : 0;
+ if (capable(CAP_BPF) && capable(CAP_SYS_ADMIN))
+ p->bpf_helper_bitfield = bitfield;
+#endif
return p;

bad_fork_cancel_cgroup:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1ed08967f..5c4232d45 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -39,6 +39,16 @@
#define bpf_event_rcu_dereference(p) \
rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))

+#ifdef CONFIG_BPF_HELPER_STRICT
+static bool check_bpf_bitfield(unsigned int flags)
+{
+ unsigned int bits = current->bpf_helper_bitfield;
+
+ if (!(bits & (1 << flags)))
+ return false;
+
+ return true;
+}
+#endif
+
#ifdef CONFIG_MODULES
struct bpf_trace_module {
struct module *module;
@@ -145,6 +155,10 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
{
+#ifdef CONFIG_BPF_HELPER_STRICT
+ if (unlikely(!check_bpf_bitfield(BPF_OVERRIDE_RETURN_BIT)))
+ return -EPERM;
+#endif
regs_set_return_value(regs, rc);
override_function_with_return(regs);
return 0;
@@ -162,8 +176,8 @@ static const struct bpf_func_proto bpf_override_return_proto = {
static __always_inline int
bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
{
- int ret;

+ int ret;
ret = copy_from_user_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
@@ -287,6 +301,10 @@ const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
const void *, unsafe_ptr)
{
+#ifdef CONFIG_BPF_HELPER_STRICT
+ if (unlikely(!check_bpf_bitfield(BPF_PROBE_READ_BIT)))
+ return -EPERM;
+#endif
if ((unsigned long)unsafe_ptr < TASK_SIZE) {
return bpf_probe_read_user_common(dst, size,
(__force void __user *)unsafe_ptr);
@@ -338,7 +356,10 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
* state, when the task or mm are switched. This is specifically
* required to prevent the use of temporary mm.
*/
-
+#ifdef CONFIG_BPF_HELPER_STRICT
+ if (unlikely(!check_bpf_bitfield(BPF_PROBE_WRITE_BIT)))
+ return -EPERM;
+#endif
if (unlikely(in_interrupt() ||
current->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
@@ -843,6 +864,10 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
* permitted in order to send signal to the current
* task.
*/
+#ifdef CONFIG_BPF_HELPER_STRICT
+ if (unlikely(!check_bpf_bitfield(BPF_SEND_SIGNAL_BIT)))
+ return -EPERM;
+#endif
if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
if (unlikely(!nmi_uaccess_okay()))
--
2.25.1


2023-01-15 21:42:43

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH] bpf: Add CONFIG_BPF_HELPER_STRICT

On Fri, Jan 13, 2023 at 11:53 PM Roland <[email protected]> wrote:
>
> In container environment, ebpf helpers could be used maliciously to
> leak information, DOS, even escape from containers.
> CONFIG_BPF_HELPER_STRICT is as a mitigation of it.
> Related Link: https://rolandorange.zone/report.html

The link is arguing that a process with CAP_SYS_ADMIN permissions
can read memory of user processes, leak kernel addresses, etc.
And this is somehow an issue with bpf helpers?
and your suggested "temporary mitigation" is to CONFIG_BPF=n ?
While this patch is a "proper fix" ?
Sorry, but please stay with your "temporary mitigation" forever.

2023-01-16 22:16:20

by KP Singh

[permalink] [raw]
Subject: Re: [PATCH] bpf: Add CONFIG_BPF_HELPER_STRICT

On Sun, Jan 15, 2023 at 10:32 PM Alexei Starovoitov
<[email protected]> wrote:
>
> On Fri, Jan 13, 2023 at 11:53 PM Roland <[email protected]> wrote:
> >
> > In container environment, ebpf helpers could be used maliciously to
> > leak information, DOS, even escape from containers.
> > CONFIG_BPF_HELPER_STRICT is as a mitigation of it.
> > Related Link: https://rolandorange.zone/report.html
>
> The link is arguing that a process with CAP_SYS_ADMIN permissions
> can read memory of user processes, leak kernel addresses, etc.
> And this is somehow an issue with bpf helpers?
> and your suggested "temporary mitigation" is to CONFIG_BPF=n ?
> While this patch is a "proper fix" ?
> Sorry, but please stay with your "temporary mitigation" forever.

100% agreeing with Alexei here, if you are running your containers
with CAP_SYS_ADMIN there are a lot of other things you need to worry
about than just BPF helpers. You need to revisit your threat model and
consider not using CAP_SYS_ADMIN and more fine grained policies using
Mandatory Access Control.

2023-01-17 16:36:14

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH] bpf: Add CONFIG_BPF_HELPER_STRICT

On Mon, Jan 16, 2023 at 10:25 PM Pwn Kernel <[email protected]> wrote:
>
> Thank you for your suggestion so much, we understand it.
>
> In the other hand, Do you think if add several lockdown reasons like LOCKDOWN_BPF_SEND_SIGNAL and LOCKDOWN_BPF_OVERRIDE_RETURN will be better or acceptable for this?
>
> Thanks for your advice!

Please do not top post and don't use html in your replies.
Please learn the process first:
https://docs.kernel.org/process/development-process.html