From: KP Singh <[email protected]>
v3 -> v4:
* Fix a memory leak noticed by Daniel.
v2 -> v3:
* bpf_trampoline_update_progs -> bpf_trampoline_get_progs + const
qualification.
* Typos in commit messages.
* Added Andrii's Acks.
v1 -> v2:
* Adressed Andrii's feedback.
* Fixed a bug that Alexei noticed about nop generation.
* Rebase.
This was brought up in the KRSI v4 discussion and found to be useful
both for security and tracing programs.
https://lore.kernel.org/bpf/[email protected]/
The modify_return programs are allowed for security hooks (with an
extra CAP_MAC_ADMIN check) and functions whitelisted for error
injection (ALLOW_ERROR_INJECTION).
The "security_" check is expected to be cleaned up with the KRSI patch
series.
Here is an example of how a fmod_ret program behaves:
int func_to_be_attached(int a, int b)
{ <--- do_fentry
do_fmod_ret:
<update ret by calling fmod_ret>
if (ret != 0)
goto do_fexit;
original_function:
<side_effects_happen_here>
} <--- do_fexit
ALLOW_ERROR_INJECTION(func_to_be_attached, ERRNO)
The fmod_ret program attached to this function can be defined as:
SEC("fmod_ret/func_to_be_attached")
int BPF_PROG(func_name, int a, int b, int ret)
{
// This will skip the original function logic.
return -1;
}
KP Singh (7):
bpf: Refactor trampoline update code
bpf: JIT helpers for fmod_ret progs
bpf: Introduce BPF_MODIFY_RETURN
bpf: Attachment verification for BPF_MODIFY_RETURN
tools/libbpf: Add support for BPF_MODIFY_RETURN
bpf: Add test ops for BPF_PROG_TYPE_TRACING
bpf: Add selftests for BPF_MODIFY_RETURN
arch/x86/net/bpf_jit_comp.c | 279 +++++++++++++-----
include/linux/bpf.h | 24 +-
include/uapi/linux/bpf.h | 1 +
kernel/bpf/bpf_struct_ops.c | 10 +-
kernel/bpf/btf.c | 27 +-
kernel/bpf/syscall.c | 1 +
kernel/bpf/trampoline.c | 65 ++--
kernel/bpf/verifier.c | 32 ++
kernel/trace/bpf_trace.c | 1 +
net/bpf/test_run.c | 57 +++-
tools/include/uapi/linux/bpf.h | 1 +
tools/lib/bpf/libbpf.c | 4 +
.../selftests/bpf/prog_tests/fentry_fexit.c | 12 +-
.../selftests/bpf/prog_tests/fentry_test.c | 14 +-
.../selftests/bpf/prog_tests/fexit_test.c | 69 ++---
.../selftests/bpf/prog_tests/modify_return.c | 65 ++++
.../selftests/bpf/progs/modify_return.c | 49 +++
17 files changed, 524 insertions(+), 187 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/modify_return.c
create mode 100644 tools/testing/selftests/bpf/progs/modify_return.c
--
2.20.1
From: KP Singh <[email protected]>
* Split the invoke_bpf program to prepare for special handling of
fmod_ret programs introduced in a subsequent patch.
* Move the definition of emit_cond_near_jump and emit_nops as they are
needed for fmod_ret.
* Refactor branch target alignment into its own generic helper function
i.e. emit_align.
Signed-off-by: KP Singh <[email protected]>
Acked-by: Andrii Nakryiko <[email protected]>
---
arch/x86/net/bpf_jit_comp.c | 148 +++++++++++++++++++++---------------
1 file changed, 85 insertions(+), 63 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 15c7d28bc05c..d6349e930b06 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1361,35 +1361,95 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
-(stack_size - i * 8));
}
+static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
+ struct bpf_prog *p, int stack_size)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (emit_call(&prog, __bpf_prog_enter, prog))
+ return -EINVAL;
+ /* remember prog start time returned by __bpf_prog_enter */
+ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+
+ /* arg1: lea rdi, [rbp - stack_size] */
+ EMIT4(0x48, 0x8D, 0x7D, -stack_size);
+ /* arg2: progs[i]->insnsi for interpreter */
+ if (!p->jited)
+ emit_mov_imm64(&prog, BPF_REG_2,
+ (long) p->insnsi >> 32,
+ (u32) (long) p->insnsi);
+ /* call JITed bpf program or interpreter */
+ if (emit_call(&prog, p->bpf_func, prog))
+ return -EINVAL;
+
+ /* arg1: mov rdi, progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
+ (u32) (long) p);
+ /* arg2: mov rsi, rbx <- start time in nsec */
+ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ if (emit_call(&prog, __bpf_prog_exit, prog))
+ return -EINVAL;
+
+ *pprog = prog;
+ return 0;
+}
+
+static void emit_nops(u8 **pprog, unsigned int len)
+{
+ unsigned int i, noplen;
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ while (len > 0) {
+ noplen = len;
+
+ if (noplen > ASM_NOP_MAX)
+ noplen = ASM_NOP_MAX;
+
+ for (i = 0; i < noplen; i++)
+ EMIT1(ideal_nops[noplen][i]);
+ len -= noplen;
+ }
+
+ *pprog = prog;
+}
+
+static void emit_align(u8 **pprog, u32 align)
+{
+ u8 *target, *prog = *pprog;
+
+ target = PTR_ALIGN(prog, align);
+ if (target != prog)
+ emit_nops(&prog, target - prog);
+
+ *pprog = prog;
+}
+
+static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ s64 offset;
+
+ offset = func - (ip + 2 + 4);
+ if (!is_simm32(offset)) {
+ pr_err("Target %p is out of range\n", func);
+ return -EINVAL;
+ }
+ EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
+ *pprog = prog;
+ return 0;
+}
+
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_progs *tp, int stack_size)
{
+ int i;
u8 *prog = *pprog;
- int cnt = 0, i;
for (i = 0; i < tp->nr_progs; i++) {
- if (emit_call(&prog, __bpf_prog_enter, prog))
- return -EINVAL;
- /* remember prog start time returned by __bpf_prog_enter */
- emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
-
- /* arg1: lea rdi, [rbp - stack_size] */
- EMIT4(0x48, 0x8D, 0x7D, -stack_size);
- /* arg2: progs[i]->insnsi for interpreter */
- if (!tp->progs[i]->jited)
- emit_mov_imm64(&prog, BPF_REG_2,
- (long) tp->progs[i]->insnsi >> 32,
- (u32) (long) tp->progs[i]->insnsi);
- /* call JITed bpf program or interpreter */
- if (emit_call(&prog, tp->progs[i]->bpf_func, prog))
- return -EINVAL;
-
- /* arg1: mov rdi, progs[i] */
- emit_mov_imm64(&prog, BPF_REG_1, (long) tp->progs[i] >> 32,
- (u32) (long) tp->progs[i]);
- /* arg2: mov rsi, rbx <- start time in nsec */
- emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
- if (emit_call(&prog, __bpf_prog_exit, prog))
+ if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size))
return -EINVAL;
}
*pprog = prog;
@@ -1531,42 +1591,6 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
return prog - (u8 *)image;
}
-static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
-{
- u8 *prog = *pprog;
- int cnt = 0;
- s64 offset;
-
- offset = func - (ip + 2 + 4);
- if (!is_simm32(offset)) {
- pr_err("Target %p is out of range\n", func);
- return -EINVAL;
- }
- EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
- *pprog = prog;
- return 0;
-}
-
-static void emit_nops(u8 **pprog, unsigned int len)
-{
- unsigned int i, noplen;
- u8 *prog = *pprog;
- int cnt = 0;
-
- while (len > 0) {
- noplen = len;
-
- if (noplen > ASM_NOP_MAX)
- noplen = ASM_NOP_MAX;
-
- for (i = 0; i < noplen; i++)
- EMIT1(ideal_nops[noplen][i]);
- len -= noplen;
- }
-
- *pprog = prog;
-}
-
static int emit_fallback_jump(u8 **pprog)
{
u8 *prog = *pprog;
@@ -1589,7 +1613,7 @@ static int emit_fallback_jump(u8 **pprog)
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
{
- u8 *jg_reloc, *jg_target, *prog = *pprog;
+ u8 *jg_reloc, *prog = *pprog;
int pivot, err, jg_bytes = 1, cnt = 0;
s64 jg_offset;
@@ -1644,9 +1668,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
* Coding Rule 11: All branch targets should be 16-byte
* aligned.
*/
- jg_target = PTR_ALIGN(prog, 16);
- if (jg_target != prog)
- emit_nops(&prog, jg_target - prog);
+ emit_align(&prog, 16);
jg_offset = prog - jg_reloc;
emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes);
--
2.20.1
From: KP Singh <[email protected]>
The current fexit and fentry tests rely on a different program to
exercise the functions they attach to. Instead of doing this, implement
the test operations for tracing which will also be used for
BPF_MODIFY_RETURN in a subsequent patch.
Also, clean up the fexit test to use the generated skeleton.
Signed-off-by: KP Singh <[email protected]>
Acked-by: Andrii Nakryiko <[email protected]>
---
include/linux/bpf.h | 10 +++
kernel/trace/bpf_trace.c | 1 +
net/bpf/test_run.c | 37 +++++++---
.../selftests/bpf/prog_tests/fentry_fexit.c | 12 +---
.../selftests/bpf/prog_tests/fentry_test.c | 14 ++--
.../selftests/bpf/prog_tests/fexit_test.c | 69 ++++++-------------
6 files changed, 67 insertions(+), 76 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f748b31e5888..40c53924571d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1156,6 +1156,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+int bpf_prog_test_run_tracing(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
@@ -1313,6 +1316,13 @@ static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
return -ENOTSUPP;
}
+static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 07764c761073..363e0a2c75cf 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1266,6 +1266,7 @@ const struct bpf_verifier_ops tracing_verifier_ops = {
};
const struct bpf_prog_ops tracing_prog_ops = {
+ .test_run = bpf_prog_test_run_tracing,
};
static bool raw_tp_writable_prog_is_valid_access(int off, int size,
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 1cd7a1c2f8b2..3600f098e7c6 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -160,18 +160,37 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
kfree(data);
return ERR_PTR(-EFAULT);
}
- if (bpf_fentry_test1(1) != 2 ||
- bpf_fentry_test2(2, 3) != 5 ||
- bpf_fentry_test3(4, 5, 6) != 15 ||
- bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
- bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
- bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) {
- kfree(data);
- return ERR_PTR(-EFAULT);
- }
+
return data;
}
+int bpf_prog_test_run_tracing(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ int err = -EFAULT;
+
+ switch (prog->expected_attach_type) {
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ if (bpf_fentry_test1(1) != 2 ||
+ bpf_fentry_test2(2, 3) != 5 ||
+ bpf_fentry_test3(4, 5, 6) != 15 ||
+ bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
+ bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
+ bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111)
+ goto out;
+ break;
+ default:
+ goto out;
+ }
+
+ err = 0;
+out:
+ trace_bpf_test_finish(&err);
+ return err;
+}
+
static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
{
void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 235ac4f67f5b..83493bd5745c 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,22 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "test_pkt_access.skel.h"
#include "fentry_test.skel.h"
#include "fexit_test.skel.h"
void test_fentry_fexit(void)
{
- struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
struct fexit_test *fexit_skel = NULL;
__u64 *fentry_res, *fexit_res;
__u32 duration = 0, retval;
- int err, pkt_fd, i;
+ int err, prog_fd, i;
- pkt_skel = test_pkt_access__open_and_load();
- if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
- return;
fentry_skel = fentry_test__open_and_load();
if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto close_prog;
@@ -31,8 +26,8 @@ void test_fentry_fexit(void)
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto close_prog;
- pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
@@ -49,7 +44,6 @@ void test_fentry_fexit(void)
}
close_prog:
- test_pkt_access__destroy(pkt_skel);
fentry_test__destroy(fentry_skel);
fexit_test__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 5cc06021f27d..04ebbf1cb390 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,20 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "test_pkt_access.skel.h"
#include "fentry_test.skel.h"
void test_fentry_test(void)
{
- struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
- int err, pkt_fd, i;
+ int err, prog_fd, i;
__u32 duration = 0, retval;
__u64 *result;
- pkt_skel = test_pkt_access__open_and_load();
- if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
- return;
fentry_skel = fentry_test__open_and_load();
if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto cleanup;
@@ -23,10 +18,10 @@ void test_fentry_test(void)
if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
goto cleanup;
- pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ prog_fd = bpf_program__fd(fentry_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
+ CHECK(err || retval, "test_run",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
@@ -39,5 +34,4 @@ void test_fentry_test(void)
cleanup:
fentry_test__destroy(fentry_skel);
- test_pkt_access__destroy(pkt_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index d2c3655dd7a3..78d7a2765c27 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -1,64 +1,37 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
+#include "fexit_test.skel.h"
void test_fexit_test(void)
{
- struct bpf_prog_load_attr attr = {
- .file = "./fexit_test.o",
- };
-
- char prog_name[] = "fexit/bpf_fentry_testX";
- struct bpf_object *obj = NULL, *pkt_obj;
- int err, pkt_fd, kfree_skb_fd, i;
- struct bpf_link *link[6] = {};
- struct bpf_program *prog[6];
+ struct fexit_test *fexit_skel = NULL;
+ int err, prog_fd, i;
__u32 duration = 0, retval;
- struct bpf_map *data_map;
- const int zero = 0;
- u64 result[6];
+ __u64 *result;
- err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
- &pkt_obj, &pkt_fd);
- if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
- return;
- err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd);
- if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
- goto close_prog;
+ fexit_skel = fexit_test__open_and_load();
+ if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+ goto cleanup;
- for (i = 0; i < 6; i++) {
- prog_name[sizeof(prog_name) - 2] = '1' + i;
- prog[i] = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name))
- goto close_prog;
- link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
- goto close_prog;
- }
- data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss");
- if (CHECK(!data_map, "find_data_map", "data map not found\n"))
- goto close_prog;
+ err = fexit_test__attach(fexit_skel);
+ if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ goto cleanup;
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
+ CHECK(err || retval, "test_run",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
- err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
- if (CHECK(err, "get_result",
- "failed to get output data: %d\n", err))
- goto close_prog;
-
- for (i = 0; i < 6; i++)
- if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
- i + 1, result[i]))
- goto close_prog;
+ result = (__u64 *)fexit_skel->bss;
+ for (i = 0; i < 6; i++) {
+ if (CHECK(result[i] != 1, "result",
+ "fexit_test%d failed err %lld\n", i + 1, result[i]))
+ goto cleanup;
+ }
-close_prog:
- for (i = 0; i < 6; i++)
- if (!IS_ERR_OR_NULL(link[i]))
- bpf_link__destroy(link[i]);
- bpf_object__close(obj);
- bpf_object__close(pkt_obj);
+cleanup:
+ fexit_test__destroy(fexit_skel);
}
--
2.20.1
From: KP Singh <[email protected]>
Test for two scenarios:
* When the fmod_ret program returns 0, the original function should
be called along with fentry and fexit programs.
* When the fmod_ret program returns a non-zero value, the original
function should not be called, no side effect should be observed and
fentry and fexit programs should be called.
The result from the kernel function call and whether a side-effect is
observed is returned via the retval attr of the BPF_PROG_TEST_RUN (bpf)
syscall.
Signed-off-by: KP Singh <[email protected]>
Acked-by: Andrii Nakryiko <[email protected]>
---
net/bpf/test_run.c | 22 ++++++-
.../selftests/bpf/prog_tests/modify_return.c | 65 +++++++++++++++++++
.../selftests/bpf/progs/modify_return.c | 49 ++++++++++++++
3 files changed, 135 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/modify_return.c
create mode 100644 tools/testing/selftests/bpf/progs/modify_return.c
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 3600f098e7c6..4c921f5154e0 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,6 +10,7 @@
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <net/tcp.h>
+#include <linux/error-injection.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
@@ -143,6 +144,14 @@ int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
return a + (long)b + c + d + (long)e + f;
}
+int noinline bpf_modify_return_test(int a, int *b)
+{
+ *b += 1;
+ return a + *b;
+}
+
+ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
+
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
u32 headroom, u32 tailroom)
{
@@ -168,7 +177,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
- int err = -EFAULT;
+ u16 side_effect = 0, ret = 0;
+ int b = 2, err = -EFAULT;
+ u32 retval = 0;
switch (prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
@@ -181,10 +192,19 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111)
goto out;
break;
+ case BPF_MODIFY_RETURN:
+ ret = bpf_modify_return_test(1, &b);
+ if (b != 2)
+ side_effect = 1;
+ break;
default:
goto out;
}
+ retval = ((u32)side_effect << 16) | ret;
+ if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
+ goto out;
+
err = 0;
out:
trace_bpf_test_finish(&err);
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
new file mode 100644
index 000000000000..97fec70c600b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include "modify_return.skel.h"
+
+#define LOWER(x) ((x) & 0xffff)
+#define UPPER(x) ((x) >> 16)
+
+
+static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
+{
+ struct modify_return *skel = NULL;
+ int err, prog_fd;
+ __u32 duration = 0, retval;
+ __u16 side_effect;
+ __s16 ret;
+
+ skel = modify_return__open_and_load();
+ if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n"))
+ goto cleanup;
+
+ err = modify_return__attach(skel);
+ if (CHECK(err, "modify_return", "attach failed: %d\n", err))
+ goto cleanup;
+
+ skel->bss->input_retval = input_retval;
+ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0,
+ &retval, &duration);
+
+ CHECK(err, "test_run", "err %d errno %d\n", err, errno);
+
+ side_effect = UPPER(retval);
+ ret = LOWER(retval);
+
+ CHECK(ret != want_ret, "test_run",
+ "unexpected ret: %d, expected: %d\n", ret, want_ret);
+ CHECK(side_effect != want_side_effect, "modify_return",
+ "unexpected side_effect: %d\n", side_effect);
+
+ CHECK(skel->bss->fentry_result != 1, "modify_return",
+ "fentry failed\n");
+ CHECK(skel->bss->fexit_result != 1, "modify_return",
+ "fexit failed\n");
+ CHECK(skel->bss->fmod_ret_result != 1, "modify_return",
+ "fmod_ret failed\n");
+
+cleanup:
+ modify_return__destroy(skel);
+}
+
+void test_modify_return(void)
+{
+ run_test(0 /* input_retval */,
+ 1 /* want_side_effect */,
+ 4 /* want_ret */);
+ run_test(-EINVAL /* input_retval */,
+ 0 /* want_side_effect */,
+ -EINVAL /* want_ret */);
+}
+
diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c
new file mode 100644
index 000000000000..8b7466a15c6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/modify_return.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int sequence = 0;
+__s32 input_retval = 0;
+
+__u64 fentry_result = 0;
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(fentry_test, int a, __u64 b)
+{
+ sequence++;
+ fentry_result = (sequence == 1);
+ return 0;
+}
+
+__u64 fmod_ret_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
+{
+ sequence++;
+ /* This is the first fmod_ret program, the ret passed should be 0 */
+ fmod_ret_result = (sequence == 2 && ret == 0);
+ return input_retval;
+}
+
+__u64 fexit_result = 0;
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int a, __u64 b, int ret)
+{
+ sequence++;
+ /* If the input_reval is non-zero a successful modification should have
+ * occurred.
+ */
+ if (input_retval)
+ fexit_result = (sequence == 3 && ret == input_retval);
+ else
+ fexit_result = (sequence == 3 && ret == 4);
+
+ return 0;
+}
--
2.20.1
From: KP Singh <[email protected]>
- Allow BPF_MODIFY_RETURN attachment only to functions that are:
* Whitelisted for error injection by checking
within_error_injection_list. Similar discussions happened for the
bpf_override_return helper.
* security hooks, this is expected to be cleaned up with the LSM
changes after the KRSI patches introduce the LSM_HOOK macro:
https://lore.kernel.org/bpf/[email protected]/
- The attachment is currently limited to functions that return an int.
This can be extended later other types (e.g. PTR).
Signed-off-by: KP Singh <[email protected]>
Acked-by: Andrii Nakryiko <[email protected]>
---
kernel/bpf/btf.c | 28 ++++++++++++++++++++--------
kernel/bpf/verifier.c | 31 +++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+), 8 deletions(-)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 30841fb8b3c0..50080add2ab9 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3710,14 +3710,26 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
nr_args--;
}
- if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
- prog->expected_attach_type == BPF_MODIFY_RETURN) &&
- arg == nr_args) {
- if (!t)
- /* Default prog with 5 args. 6th arg is retval. */
- return true;
- /* function return type */
- t = btf_type_by_id(btf, t->type);
+ if (arg == nr_args) {
+ if (prog->expected_attach_type == BPF_TRACE_FEXIT) {
+ if (!t)
+ return true;
+ t = btf_type_by_id(btf, t->type);
+ } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
+ /* For now the BPF_MODIFY_RETURN can only be attached to
+ * functions that return an int.
+ */
+ if (!t)
+ return false;
+
+ t = btf_type_skip_modifiers(btf, t->type, NULL);
+ if (!btf_type_is_int(t)) {
+ bpf_log(log,
+ "ret type %s not allowed for fmod_ret\n",
+ btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return false;
+ }
+ }
} else if (arg >= nr_args) {
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
tname, arg + 1);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2460c8e6b5be..ae32517d4ccd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -19,6 +19,7 @@
#include <linux/sort.h>
#include <linux/perf_event.h>
#include <linux/ctype.h>
+#include <linux/error-injection.h>
#include "disasm.h"
@@ -9800,6 +9801,33 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
return 0;
}
+#define SECURITY_PREFIX "security_"
+
+static int check_attach_modify_return(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr;
+
+ if (within_error_injection_list(addr))
+ return 0;
+
+ /* This is expected to be cleaned up in the future with the KRSI effort
+ * introducing the LSM_HOOK macro for cleaning up lsm_hooks.h.
+ */
+ if (!strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
+ sizeof(SECURITY_PREFIX) - 1)) {
+
+ if (!capable(CAP_MAC_ADMIN))
+ return -EPERM;
+
+ return 0;
+ }
+
+ verbose(env, "fmod_ret attach_btf_id %u (%s) is not modifiable\n",
+ prog->aux->attach_btf_id, prog->aux->attach_func_name);
+
+ return -EINVAL;
+}
static int check_attach_btf_id(struct bpf_verifier_env *env)
{
@@ -10000,6 +10028,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
}
tr->func.addr = (void *)addr;
prog->aux->trampoline = tr;
+
+ if (prog->expected_attach_type == BPF_MODIFY_RETURN)
+ ret = check_attach_modify_return(env);
out:
mutex_unlock(&tr->mutex);
if (ret)
--
2.20.1
From: KP Singh <[email protected]>
When multiple programs are attached, each program receives the return
value from the previous program on the stack and the last program
provides the return value to the attached function.
The fmod_ret bpf programs are run after the fentry programs and before
the fexit programs. The original function is only called if all the
fmod_ret programs return 0 to avoid any unintended side-effects. The
success value, i.e. 0 is not currently configurable but can be made so
where user-space can specify it at load time.
For example:
int func_to_be_attached(int a, int b)
{ <--- do_fentry
do_fmod_ret:
<update ret by calling fmod_ret>
if (ret != 0)
goto do_fexit;
original_function:
<side_effects_happen_here>
} <--- do_fexit
The fmod_ret program attached to this function can be defined as:
SEC("fmod_ret/func_to_be_attached")
int BPF_PROG(func_name, int a, int b, int ret)
{
// This will skip the original function logic.
return 1;
}
The first fmod_ret program is passed 0 in its return argument.
Signed-off-by: KP Singh <[email protected]>
Acked-by: Andrii Nakryiko <[email protected]>
---
arch/x86/net/bpf_jit_comp.c | 130 ++++++++++++++++++++++++++++++---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 1 +
kernel/bpf/btf.c | 3 +-
kernel/bpf/syscall.c | 1 +
kernel/bpf/trampoline.c | 5 +-
kernel/bpf/verifier.c | 1 +
tools/include/uapi/linux/bpf.h | 1 +
8 files changed, 130 insertions(+), 13 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index d6349e930b06..b1fd000feb89 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1362,7 +1362,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
}
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
- struct bpf_prog *p, int stack_size)
+ struct bpf_prog *p, int stack_size, bool mod_ret)
{
u8 *prog = *pprog;
int cnt = 0;
@@ -1383,6 +1383,13 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
if (emit_call(&prog, p->bpf_func, prog))
return -EINVAL;
+ /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
+ * of the previous call which is then passed on the stack to
+ * the next BPF program.
+ */
+ if (mod_ret)
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+
/* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
(u32) (long) p);
@@ -1442,6 +1449,23 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
return 0;
}
+static int emit_mod_ret_check_imm8(u8 **pprog, int value)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (!is_imm8(value))
+ return -EINVAL;
+
+ if (value == 0)
+ EMIT2(0x85, add_2reg(0xC0, BPF_REG_0, BPF_REG_0));
+ else
+ EMIT3(0x83, add_1reg(0xF8, BPF_REG_0), value);
+
+ *pprog = prog;
+ return 0;
+}
+
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_progs *tp, int stack_size)
{
@@ -1449,9 +1473,49 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
u8 *prog = *pprog;
for (i = 0; i < tp->nr_progs; i++) {
- if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size))
+ if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
+ return -EINVAL;
+ }
+ *pprog = prog;
+ return 0;
+}
+
+static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
+ struct bpf_tramp_progs *tp, int stack_size,
+ u8 **branches)
+{
+ u8 *prog = *pprog;
+ int i;
+
+ /* The first fmod_ret program will receive a garbage return value.
+ * Set this to 0 to avoid confusing the program.
+ */
+ emit_mov_imm32(&prog, false, BPF_REG_0, 0);
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+ for (i = 0; i < tp->nr_progs; i++) {
+ if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true))
return -EINVAL;
+
+ /* Generate a branch:
+ *
+ * if (ret != 0)
+ * goto do_fexit;
+ *
+ * If needed this can be extended to any integer value which can
+ * be passed by user-space when the program is loaded.
+ */
+ if (emit_mod_ret_check_imm8(&prog, 0))
+ return -EINVAL;
+
+ /* Save the location of the branch and Generate 6 nops
+ * (4 bytes for an offset and 2 bytes for the jump) These nops
+ * are replaced with a conditional jump once do_fexit (i.e. the
+ * start of the fexit invocation) is finalized.
+ */
+ branches[i] = prog;
+ emit_nops(&prog, 4 + 2);
}
+
*pprog = prog;
return 0;
}
@@ -1521,10 +1585,12 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
struct bpf_tramp_progs *tprogs,
void *orig_call)
{
- int cnt = 0, nr_args = m->nr_args;
+ int ret, i, cnt = 0, nr_args = m->nr_args;
int stack_size = nr_args * 8;
struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
+ u8 **branches = NULL;
u8 *prog;
/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
@@ -1557,24 +1623,60 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
if (invoke_bpf(m, &prog, fentry, stack_size))
return -EINVAL;
+ if (fmod_ret->nr_progs) {
+ branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *),
+ GFP_KERNEL);
+ if (!branches)
+ return -ENOMEM;
+
+ if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size,
+ branches)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ }
+
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- if (fentry->nr_progs)
+ if (fentry->nr_progs || fmod_ret->nr_progs)
restore_regs(m, &prog, nr_args, stack_size);
/* call original function */
- if (emit_call(&prog, orig_call, prog))
- return -EINVAL;
+ if (emit_call(&prog, orig_call, prog)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
/* remember return value in a stack for bpf prog to access */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
}
+ if (fmod_ret->nr_progs) {
+ /* From Intel 64 and IA-32 Architectures Optimization
+ * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
+ * Coding Rule 11: All branch targets should be 16-byte
+ * aligned.
+ */
+ emit_align(&prog, 16);
+ /* Update the branches saved in invoke_bpf_mod_ret with the
+ * aligned address of do_fexit.
+ */
+ for (i = 0; i < fmod_ret->nr_progs; i++)
+ emit_cond_near_jump(&branches[i], prog, branches[i],
+ X86_JNE);
+ }
+
if (fexit->nr_progs)
- if (invoke_bpf(m, &prog, fexit, stack_size))
- return -EINVAL;
+ if (invoke_bpf(m, &prog, fexit, stack_size)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
if (flags & BPF_TRAMP_F_RESTORE_REGS)
restore_regs(m, &prog, nr_args, stack_size);
+ /* This needs to be done regardless. If there were fmod_ret programs,
+ * the return value is only updated on the stack and still needs to be
+ * restored to R0.
+ */
if (flags & BPF_TRAMP_F_CALL_ORIG)
/* restore original return value back into RAX */
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
@@ -1586,9 +1688,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
EMIT1(0xC3); /* ret */
/* Make sure the trampoline generation logic doesn't overflow */
- if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY))
- return -EFAULT;
- return prog - (u8 *)image;
+ if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
+ ret = -EFAULT;
+ goto cleanup;
+ }
+ ret = prog - (u8 *)image;
+
+cleanup:
+ kfree(branches);
+ return ret;
}
static int emit_fallback_jump(u8 **pprog)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 98ec10b23dbb..f748b31e5888 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -474,6 +474,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
enum bpf_tramp_prog_type {
BPF_TRAMP_FENTRY,
BPF_TRAMP_FEXIT,
+ BPF_TRAMP_MODIFY_RETURN,
BPF_TRAMP_MAX,
BPF_TRAMP_REPLACE, /* more than MAX */
};
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d6b33ea27bcc..40b2d9476268 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -210,6 +210,7 @@ enum bpf_attach_type {
BPF_TRACE_RAW_TP,
BPF_TRACE_FENTRY,
BPF_TRACE_FEXIT,
+ BPF_MODIFY_RETURN,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 787140095e58..30841fb8b3c0 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3710,7 +3710,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
nr_args--;
}
- if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
+ if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
+ prog->expected_attach_type == BPF_MODIFY_RETURN) &&
arg == nr_args) {
if (!t)
/* Default prog with 5 args. 6th arg is retval. */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 13de65363ba2..7ce0815793dd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2324,6 +2324,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
prog->expected_attach_type != BPF_TRACE_FEXIT &&
+ prog->expected_attach_type != BPF_MODIFY_RETURN &&
prog->type != BPF_PROG_TYPE_EXT) {
err = -EINVAL;
goto out_put_prog;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 546198f6f307..221a17af1f81 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -232,7 +232,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
goto out;
}
- if (tprogs[BPF_TRAMP_FEXIT].nr_progs)
+ if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
+ tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
/* Though the second half of trampoline page is unused a task could be
@@ -269,6 +270,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
switch (t) {
case BPF_TRACE_FENTRY:
return BPF_TRAMP_FENTRY;
+ case BPF_MODIFY_RETURN:
+ return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT:
return BPF_TRAMP_FEXIT;
default:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 289383edfc8c..2460c8e6b5be 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9950,6 +9950,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
if (!prog_extension)
return -EINVAL;
/* fallthrough */
+ case BPF_MODIFY_RETURN:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
if (!btf_type_is_func(t)) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d6b33ea27bcc..40b2d9476268 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -210,6 +210,7 @@ enum bpf_attach_type {
BPF_TRACE_RAW_TP,
BPF_TRACE_FENTRY,
BPF_TRACE_FEXIT,
+ BPF_MODIFY_RETURN,
__MAX_BPF_ATTACH_TYPE
};
--
2.20.1
From: KP Singh <[email protected]>
Signed-off-by: KP Singh <[email protected]>
Acked-by: Andrii Nakryiko <[email protected]>
---
tools/lib/bpf/libbpf.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f8c4042e5855..223be01dc466 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6288,6 +6288,10 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_FENTRY,
.is_attach_btf = true,
.attach_fn = attach_trace),
+ SEC_DEF("fmod_ret/", TRACING,
+ .expected_attach_type = BPF_MODIFY_RETURN,
+ .is_attach_btf = true,
+ .attach_fn = attach_trace),
SEC_DEF("fexit/", TRACING,
.expected_attach_type = BPF_TRACE_FEXIT,
.is_attach_btf = true,
--
2.20.1
From: KP Singh <[email protected]>
As we need to introduce a third type of attachment for trampolines, the
flattened signature of arch_prepare_bpf_trampoline gets even more
complicated.
Refactor the prog and count argument to arch_prepare_bpf_trampoline to
use bpf_tramp_progs to simplify the addition and accounting for new
attachment types.
Signed-off-by: KP Singh <[email protected]>
Acked-by: Andrii Nakryiko <[email protected]>
---
arch/x86/net/bpf_jit_comp.c | 31 ++++++++++---------
include/linux/bpf.h | 13 ++++++--
kernel/bpf/bpf_struct_ops.c | 10 +++++-
kernel/bpf/trampoline.c | 62 +++++++++++++++++++++----------------
4 files changed, 71 insertions(+), 45 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 9ba08e9abc09..15c7d28bc05c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1362,12 +1362,12 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
}
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
- struct bpf_prog **progs, int prog_cnt, int stack_size)
+ struct bpf_tramp_progs *tp, int stack_size)
{
u8 *prog = *pprog;
int cnt = 0, i;
- for (i = 0; i < prog_cnt; i++) {
+ for (i = 0; i < tp->nr_progs; i++) {
if (emit_call(&prog, __bpf_prog_enter, prog))
return -EINVAL;
/* remember prog start time returned by __bpf_prog_enter */
@@ -1376,17 +1376,17 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
/* arg1: lea rdi, [rbp - stack_size] */
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
/* arg2: progs[i]->insnsi for interpreter */
- if (!progs[i]->jited)
+ if (!tp->progs[i]->jited)
emit_mov_imm64(&prog, BPF_REG_2,
- (long) progs[i]->insnsi >> 32,
- (u32) (long) progs[i]->insnsi);
+ (long) tp->progs[i]->insnsi >> 32,
+ (u32) (long) tp->progs[i]->insnsi);
/* call JITed bpf program or interpreter */
- if (emit_call(&prog, progs[i]->bpf_func, prog))
+ if (emit_call(&prog, tp->progs[i]->bpf_func, prog))
return -EINVAL;
/* arg1: mov rdi, progs[i] */
- emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32,
- (u32) (long) progs[i]);
+ emit_mov_imm64(&prog, BPF_REG_1, (long) tp->progs[i] >> 32,
+ (u32) (long) tp->progs[i]);
/* arg2: mov rsi, rbx <- start time in nsec */
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
if (emit_call(&prog, __bpf_prog_exit, prog))
@@ -1458,12 +1458,13 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
*/
int arch_prepare_bpf_trampoline(void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_prog **fentry_progs, int fentry_cnt,
- struct bpf_prog **fexit_progs, int fexit_cnt,
+ struct bpf_tramp_progs *tprogs,
void *orig_call)
{
int cnt = 0, nr_args = m->nr_args;
int stack_size = nr_args * 8;
+ struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
u8 *prog;
/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
@@ -1492,12 +1493,12 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
save_regs(m, &prog, nr_args, stack_size);
- if (fentry_cnt)
- if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size))
+ if (fentry->nr_progs)
+ if (invoke_bpf(m, &prog, fentry, stack_size))
return -EINVAL;
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- if (fentry_cnt)
+ if (fentry->nr_progs)
restore_regs(m, &prog, nr_args, stack_size);
/* call original function */
@@ -1507,8 +1508,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
}
- if (fexit_cnt)
- if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size))
+ if (fexit->nr_progs)
+ if (invoke_bpf(m, &prog, fexit, stack_size))
return -EINVAL;
if (flags & BPF_TRAMP_F_RESTORE_REGS)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f13c78c6f29d..98ec10b23dbb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -433,6 +433,16 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_SKIP_FRAME BIT(2)
+/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
+ * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
+ */
+#define BPF_MAX_TRAMP_PROGS 40
+
+struct bpf_tramp_progs {
+ struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
+ int nr_progs;
+};
+
/* Different use cases for BPF trampoline:
* 1. replace nop at the function entry (kprobe equivalent)
* flags = BPF_TRAMP_F_RESTORE_REGS
@@ -455,8 +465,7 @@ struct btf_func_model {
*/
int arch_prepare_bpf_trampoline(void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_prog **fentry_progs, int fentry_cnt,
- struct bpf_prog **fexit_progs, int fexit_cnt,
+ struct bpf_tramp_progs *tprogs,
void *orig_call);
/* these two functions are called from generated trampoline */
u64 notrace __bpf_prog_enter(void);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index c498f0fffb40..ca5cc8cdb6eb 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -320,6 +320,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
struct bpf_struct_ops_value *uvalue, *kvalue;
const struct btf_member *member;
const struct btf_type *t = st_ops->type;
+ struct bpf_tramp_progs *tprogs = NULL;
void *udata, *kdata;
int prog_fd, err = 0;
void *image;
@@ -343,6 +344,10 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (uvalue->state || refcount_read(&uvalue->refcnt))
return -EINVAL;
+ tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
+ if (!tprogs)
+ return -ENOMEM;
+
uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;
@@ -425,10 +430,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto reset_unlock;
}
+ tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
+ tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
err = arch_prepare_bpf_trampoline(image,
st_map->image + PAGE_SIZE,
&st_ops->func_models[i], 0,
- &prog, 1, NULL, 0, NULL);
+ tprogs, NULL);
if (err < 0)
goto reset_unlock;
@@ -469,6 +476,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
memset(uvalue, 0, map->value_size);
memset(kvalue, 0, map->value_size);
unlock:
+ kfree(tprogs);
mutex_unlock(&st_map->lock);
return err;
}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 704fa787fec0..546198f6f307 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -190,40 +190,49 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
return ret;
}
-/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
- * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
- */
-#define BPF_MAX_TRAMP_PROGS 40
+static struct bpf_tramp_progs *
+bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
+{
+ const struct bpf_prog_aux *aux;
+ struct bpf_tramp_progs *tprogs;
+ struct bpf_prog **progs;
+ int kind;
+
+ *total = 0;
+ tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
+ if (!tprogs)
+ return ERR_PTR(-ENOMEM);
+
+ for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
+ tprogs[kind].nr_progs = tr->progs_cnt[kind];
+ *total += tr->progs_cnt[kind];
+ progs = tprogs[kind].progs;
+
+ hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
+ *progs++ = aux->prog;
+ }
+ return tprogs;
+}
static int bpf_trampoline_update(struct bpf_trampoline *tr)
{
void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
- struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
- int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
- int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
- struct bpf_prog **progs, **fentry, **fexit;
+ struct bpf_tramp_progs *tprogs;
u32 flags = BPF_TRAMP_F_RESTORE_REGS;
- struct bpf_prog_aux *aux;
- int err;
+ int err, total;
- if (fentry_cnt + fexit_cnt == 0) {
+ tprogs = bpf_trampoline_get_progs(tr, &total);
+ if (IS_ERR(tprogs))
+ return PTR_ERR(tprogs);
+
+ if (total == 0) {
err = unregister_fentry(tr, old_image);
tr->selector = 0;
goto out;
}
- /* populate fentry progs */
- fentry = progs = progs_to_run;
- hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist)
- *progs++ = aux->prog;
-
- /* populate fexit progs */
- fexit = progs;
- hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist)
- *progs++ = aux->prog;
-
- if (fexit_cnt)
+ if (tprogs[BPF_TRAMP_FEXIT].nr_progs)
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
/* Though the second half of trampoline page is unused a task could be
@@ -232,12 +241,11 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
* preempted task. Hence wait for tasks to voluntarily schedule or go
* to userspace.
*/
+
synchronize_rcu_tasks();
err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
- &tr->func.model, flags,
- fentry, fentry_cnt,
- fexit, fexit_cnt,
+ &tr->func.model, flags, tprogs,
tr->func.addr);
if (err < 0)
goto out;
@@ -252,6 +260,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
goto out;
tr->selector++;
out:
+ kfree(tprogs);
return err;
}
@@ -409,8 +418,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
int __weak
arch_prepare_bpf_trampoline(void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_prog **fentry_progs, int fentry_cnt,
- struct bpf_prog **fexit_progs, int fexit_cnt,
+ struct bpf_tramp_progs *tprogs,
void *orig_call)
{
return -ENOTSUPP;
--
2.20.1
On Wed, Mar 04, 2020 at 08:18:46PM +0100, KP Singh wrote:
>
> Here is an example of how a fmod_ret program behaves:
>
> int func_to_be_attached(int a, int b)
V> { <--- do_fentry
>
> do_fmod_ret:
> <update ret by calling fmod_ret>
> if (ret != 0)
> goto do_fexit;
>
> original_function:
>
> <side_effects_happen_here>
>
> } <--- do_fexit
>
> ALLOW_ERROR_INJECTION(func_to_be_attached, ERRNO)
>
> The fmod_ret program attached to this function can be defined as:
>
> SEC("fmod_ret/func_to_be_attached")
> int BPF_PROG(func_name, int a, int b, int ret)
> {
> // This will skip the original function logic.
> return -1;
> }
Applied to bpf-next. Thanks.
I think it sets up a great base to parallelize further work.
1. I'm rebasing my sleepable BPF patches on top.
It's necessary to read enviroment variables without the
'opportunistic copy before hand' hack I saw in your github tree
to do bpf_get_env_var() helper.
2. please continue on LSM_HOOK patches to go via security tree.
3. we need a volunteer to generalize bpf_sk_storage to task and inode structs.
This work will be super useful for all bpf tracing too.
Sleepable progs are useful for tracing as well.
On Wed, Mar 4, 2020 at 2:20 PM KP Singh <[email protected]> wrote:
>
> From: KP Singh <[email protected]>
>
> - Allow BPF_MODIFY_RETURN attachment only to functions that are:
>
> * Whitelisted for error injection by checking
> within_error_injection_list. Similar discussions happened for the
> bpf_override_return helper.
>
> * security hooks, this is expected to be cleaned up with the LSM
> changes after the KRSI patches introduce the LSM_HOOK macro:
>
> https://lore.kernel.org/bpf/[email protected]/
>
> - The attachment is currently limited to functions that return an int.
> This can be extended later other types (e.g. PTR).
>
> Signed-off-by: KP Singh <[email protected]>
> Acked-by: Andrii Nakryiko <[email protected]>
> ---
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 2460c8e6b5be..ae32517d4ccd 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -9800,6 +9801,33 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
>
> return 0;
> }
> +#define SECURITY_PREFIX "security_"
> +
> +static int check_attach_modify_return(struct bpf_verifier_env *env)
> +{
> + struct bpf_prog *prog = env->prog;
> + unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr;
> +
> + if (within_error_injection_list(addr))
> + return 0;
> +
> + /* This is expected to be cleaned up in the future with the KRSI effort
> + * introducing the LSM_HOOK macro for cleaning up lsm_hooks.h.
> + */
> + if (!strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
> + sizeof(SECURITY_PREFIX) - 1)) {
> +
> + if (!capable(CAP_MAC_ADMIN))
> + return -EPERM;
CAP_MAC_ADMIN was originally introduced for Smack and is not
all-powerful wrt SELinux, so this is not a sufficient check for
SELinux.
We would want an actual security hook called here so we can implement
a specific check over userspace
being able to attach BPF progs to LSM hooks. CAP_MAC_ADMIN has other
connotations to SELinux (presently the
ability to set/get file security labels that are not known to the
currently loaded policy).
On Wed, Mar 4, 2020 at 2:20 PM KP Singh <[email protected]> wrote:
>
> From: KP Singh <[email protected]>
>
> When multiple programs are attached, each program receives the return
> value from the previous program on the stack and the last program
> provides the return value to the attached function.
>
> The fmod_ret bpf programs are run after the fentry programs and before
> the fexit programs. The original function is only called if all the
> fmod_ret programs return 0 to avoid any unintended side-effects. The
> success value, i.e. 0 is not currently configurable but can be made so
> where user-space can specify it at load time.
>
> For example:
>
> int func_to_be_attached(int a, int b)
> { <--- do_fentry
>
> do_fmod_ret:
> <update ret by calling fmod_ret>
> if (ret != 0)
> goto do_fexit;
>
> original_function:
>
> <side_effects_happen_here>
>
> } <--- do_fexit
>
> The fmod_ret program attached to this function can be defined as:
>
> SEC("fmod_ret/func_to_be_attached")
> int BPF_PROG(func_name, int a, int b, int ret)
> {
> // This will skip the original function logic.
> return 1;
> }
>
> The first fmod_ret program is passed 0 in its return argument.
>
> Signed-off-by: KP Singh <[email protected]>
> Acked-by: Andrii Nakryiko <[email protected]>
IIUC you've switched from a model where the BPF program would be
invoked after the original function logic
and the BPF program is skipped if the original function logic returns
non-zero to a model where the BPF program is invoked first and
the original function logic is skipped if the BPF program returns
non-zero. I'm not keen on that for userspace-loaded code attached
to LSM hooks; it means that userspace BPF programs can run even if
SELinux would have denied access and SELinux hooks get
skipped entirely if the BPF program returns an error. I think Casey
may have wrongly pointed you in this direction on the grounds
it can already happen with the base DAC checking logic. But that's
kernel DAC checking logic, not userspace-loaded code.
And the existing checking on attachment is not sufficient for SELinux
since CAP_MAC_ADMIN is not all powerful to SELinux.
Be careful about designing your mechanisms around Smack because Smack
is not the only LSM.
On 05-Mar 08:51, Stephen Smalley wrote:
> On Wed, Mar 4, 2020 at 2:20 PM KP Singh <[email protected]> wrote:
> >
> > From: KP Singh <[email protected]>
> >
> > When multiple programs are attached, each program receives the return
> > value from the previous program on the stack and the last program
> > provides the return value to the attached function.
> >
> > The fmod_ret bpf programs are run after the fentry programs and before
> > the fexit programs. The original function is only called if all the
> > fmod_ret programs return 0 to avoid any unintended side-effects. The
> > success value, i.e. 0 is not currently configurable but can be made so
> > where user-space can specify it at load time.
> >
> > For example:
> >
> > int func_to_be_attached(int a, int b)
> > { <--- do_fentry
> >
> > do_fmod_ret:
> > <update ret by calling fmod_ret>
> > if (ret != 0)
> > goto do_fexit;
> >
> > original_function:
> >
> > <side_effects_happen_here>
> >
> > } <--- do_fexit
> >
> > The fmod_ret program attached to this function can be defined as:
> >
> > SEC("fmod_ret/func_to_be_attached")
> > int BPF_PROG(func_name, int a, int b, int ret)
> > {
> > // This will skip the original function logic.
> > return 1;
> > }
> >
> > The first fmod_ret program is passed 0 in its return argument.
> >
> > Signed-off-by: KP Singh <[email protected]>
> > Acked-by: Andrii Nakryiko <[email protected]>
>
> IIUC you've switched from a model where the BPF program would be
> invoked after the original function logic
> and the BPF program is skipped if the original function logic returns
> non-zero to a model where the BPF program is invoked first and
> the original function logic is skipped if the BPF program returns
> non-zero. I'm not keen on that for userspace-loaded code attached
We do want to continue the KRSI series and the effort to implement a
proper BPF LSM. In the meantime, the tracing + error injection
solution helps us to:
* Provide better debug capabilities.
* And parallelize the effort to come up with the right helpers
for our LSM work and work on sleepable BPF which is also essential
for some of the helpers.
As you noted, in the KRSI v4 series, we mentioned that we would like
to have the user-space loaded BPF programs be unable to override the
decision made by the in-kernel logic/LSMs, but this got shot down:
https://lore.kernel.org/bpf/[email protected]
I would like to continue this discussion when we post the v5 series
for KRSI as to what the correct precedence order should be for the
BPF_PROG_TYPE_LSM and would appreciate if you also bring it up there.
> to LSM hooks; it means that userspace BPF programs can run even if
> SELinux would have denied access and SELinux hooks get
> skipped entirely if the BPF program returns an error. I think Casey
> may have wrongly pointed you in this direction on the grounds
> it can already happen with the base DAC checking logic. But that's
What we can do for this tracing/modify_ret series, is to remove
the special casing for "security_" functions in the BPF code and add
ALLOW_ERROR_INJECTION calls to the security hooks. This way, if
someone needs to disable the BPF programs being able to modify
security hooks, they can disable error injection. If that's okay, we
can send a patch.
- KP
> kernel DAC checking logic, not userspace-loaded code.
> And the existing checking on attachment is not sufficient for SELinux
> since CAP_MAC_ADMIN is not all powerful to SELinux.
> Be careful about designing your mechanisms around Smack because Smack
> is not the only LSM.
On Thu, Mar 05, 2020 at 08:43:11AM -0500, Stephen Smalley wrote:
> On Wed, Mar 4, 2020 at 2:20 PM KP Singh <[email protected]> wrote:
> >
> > From: KP Singh <[email protected]>
> >
> > - Allow BPF_MODIFY_RETURN attachment only to functions that are:
> >
> > * Whitelisted for error injection by checking
> > within_error_injection_list. Similar discussions happened for the
> > bpf_override_return helper.
> >
> > * security hooks, this is expected to be cleaned up with the LSM
> > changes after the KRSI patches introduce the LSM_HOOK macro:
> >
> > https://lore.kernel.org/bpf/[email protected]/
> >
> > - The attachment is currently limited to functions that return an int.
> > This can be extended later other types (e.g. PTR).
> >
> > Signed-off-by: KP Singh <[email protected]>
> > Acked-by: Andrii Nakryiko <[email protected]>
> > ---
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 2460c8e6b5be..ae32517d4ccd 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -9800,6 +9801,33 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
> >
> > return 0;
> > }
> > +#define SECURITY_PREFIX "security_"
> > +
> > +static int check_attach_modify_return(struct bpf_verifier_env *env)
> > +{
> > + struct bpf_prog *prog = env->prog;
> > + unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr;
> > +
> > + if (within_error_injection_list(addr))
> > + return 0;
> > +
> > + /* This is expected to be cleaned up in the future with the KRSI effort
> > + * introducing the LSM_HOOK macro for cleaning up lsm_hooks.h.
> > + */
> > + if (!strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
> > + sizeof(SECURITY_PREFIX) - 1)) {
> > +
> > + if (!capable(CAP_MAC_ADMIN))
> > + return -EPERM;
>
> CAP_MAC_ADMIN was originally introduced for Smack and is not
> all-powerful wrt SELinux, so this is not a sufficient check for
> SELinux.
I think you're misunderstanding the intent here.
This facility is just a faster version of kprobe based fault injection.
It doesn't care about LSM. Security is not a focus here.
It can fault inject in a lot of places in the kernel: syscalls,
kmalloc, page_alloc, fs internals, etc
I think above capable() check created this confusion and
we should remove it.
On 3/5/2020 7:54 AM, KP Singh wrote:
> On 05-Mar 08:51, Stephen Smalley wrote:
>> On Wed, Mar 4, 2020 at 2:20 PM KP Singh <[email protected]> wrote:
>>> From: KP Singh <[email protected]>
>>>
>>> When multiple programs are attached, each program receives the return
>>> value from the previous program on the stack and the last program
>>> provides the return value to the attached function.
>>>
>>> The fmod_ret bpf programs are run after the fentry programs and before
>>> the fexit programs. The original function is only called if all the
>>> fmod_ret programs return 0 to avoid any unintended side-effects. The
>>> success value, i.e. 0 is not currently configurable but can be made so
>>> where user-space can specify it at load time.
>>>
>>> For example:
>>>
>>> int func_to_be_attached(int a, int b)
>>> { <--- do_fentry
>>>
>>> do_fmod_ret:
>>> <update ret by calling fmod_ret>
>>> if (ret != 0)
>>> goto do_fexit;
>>>
>>> original_function:
>>>
>>> <side_effects_happen_here>
>>>
>>> } <--- do_fexit
>>>
>>> The fmod_ret program attached to this function can be defined as:
>>>
>>> SEC("fmod_ret/func_to_be_attached")
>>> int BPF_PROG(func_name, int a, int b, int ret)
>>> {
>>> // This will skip the original function logic.
>>> return 1;
>>> }
>>>
>>> The first fmod_ret program is passed 0 in its return argument.
>>>
>>> Signed-off-by: KP Singh <[email protected]>
>>> Acked-by: Andrii Nakryiko <[email protected]>
>> IIUC you've switched from a model where the BPF program would be
>> invoked after the original function logic
>> and the BPF program is skipped if the original function logic returns
>> non-zero to a model where the BPF program is invoked first and
>> the original function logic is skipped if the BPF program returns
>> non-zero. I'm not keen on that for userspace-loaded code attached
> We do want to continue the KRSI series and the effort to implement a
> proper BPF LSM. In the meantime, the tracing + error injection
> solution helps us to:
>
> * Provide better debug capabilities.
> * And parallelize the effort to come up with the right helpers
> for our LSM work and work on sleepable BPF which is also essential
> for some of the helpers.
>
> As you noted, in the KRSI v4 series, we mentioned that we would like
> to have the user-space loaded BPF programs be unable to override the
> decision made by the in-kernel logic/LSMs, but this got shot down:
>
> https://lore.kernel.org/bpf/[email protected]
>
> I would like to continue this discussion when we post the v5 series
> for KRSI as to what the correct precedence order should be for the
> BPF_PROG_TYPE_LSM and would appreciate if you also bring it up there.
I believe that I have stated that order isn't my issue.
Go first, last or as specified in the lsm list, I really
don't care. We'll talk about what does matter in the KRSI
thread.
>> to LSM hooks; it means that userspace BPF programs can run even if
>> SELinux would have denied access and SELinux hooks get
>> skipped entirely if the BPF program returns an error.
Then I'm fine with using the LSM ordering mechanisms that Kees
thought through to run the BPF last. Although I think it's somewhat
concerning that SELinux cares what other security models might be
in place. If BPF programs can violate SELinux (or traditional DAC)
policies there are bigger issues than ordering.
>> I think Casey
>> may have wrongly pointed you in this direction on the grounds
>> it can already happen with the base DAC checking logic. But that's
> What we can do for this tracing/modify_ret series, is to remove
> the special casing for "security_" functions in the BPF code and add
> ALLOW_ERROR_INJECTION calls to the security hooks. This way, if
> someone needs to disable the BPF programs being able to modify
> security hooks, they can disable error injection. If that's okay, we
> can send a patch.
>
> - KP
>
>> kernel DAC checking logic, not userspace-loaded code.
>> And the existing checking on attachment is not sufficient for SELinux
>> since CAP_MAC_ADMIN is not all powerful to SELinux.
>> Be careful about designing your mechanisms around Smack because Smack
>> is not the only LSM.
:)
On 04-Mar 14:17, Alexei Starovoitov wrote:
> On Wed, Mar 04, 2020 at 08:18:46PM +0100, KP Singh wrote:
> >
> > Here is an example of how a fmod_ret program behaves:
> >
> > int func_to_be_attached(int a, int b)
> V> { <--- do_fentry
> >
> > do_fmod_ret:
> > <update ret by calling fmod_ret>
> > if (ret != 0)
> > goto do_fexit;
> >
> > original_function:
> >
> > <side_effects_happen_here>
> >
> > } <--- do_fexit
> >
> > ALLOW_ERROR_INJECTION(func_to_be_attached, ERRNO)
> >
> > The fmod_ret program attached to this function can be defined as:
> >
> > SEC("fmod_ret/func_to_be_attached")
> > int BPF_PROG(func_name, int a, int b, int ret)
> > {
> > // This will skip the original function logic.
> > return -1;
> > }
>
> Applied to bpf-next. Thanks.
Thanks.
>
> I think it sets up a great base to parallelize further work.
Totally Agreed!
>
> 1. I'm rebasing my sleepable BPF patches on top.
> It's necessary to read enviroment variables without the
> 'opportunistic copy before hand' hack I saw in your github tree
:) Sleepable BPF would indeed make it much simpler.
> to do bpf_get_env_var() helper.
>
> 2. please continue on LSM_HOOK patches to go via security tree.
>
> 3. we need a volunteer to generalize bpf_sk_storage to task and inode structs.
This is quite important, especially for some of the examples we had
brought up.
I can take a look at the generalization of bpf_sk_storage.
Thanks!
- KP
> This work will be super useful for all bpf tracing too.
> Sleepable progs are useful for tracing as well.
On Thu, Mar 5, 2020 at 12:35 PM Casey Schaufler <[email protected]> wrote:
> I believe that I have stated that order isn't my issue.
> Go first, last or as specified in the lsm list, I really
> don't care. We'll talk about what does matter in the KRSI
> thread.
Order matters when the security module logic (in this case, the BPF
program) is loaded from userspace and
the userspace process isn't already required to be fully privileged
with respect to the in-kernel security modules.
CAP_MAC_ADMIN was their (not unreasonable) attempt to check that
requirement; it just doesn't happen to convey
the same meaning for SELinux since SELinux predates the introduction
of CAP_MAC_ADMIN (in Linux at least) and
since SELinux was designed to confine even processes with capabilities.
> Then I'm fine with using the LSM ordering mechanisms that Kees
> thought through to run the BPF last. Although I think it's somewhat
> concerning that SELinux cares what other security models might be
> in place. If BPF programs can violate SELinux (or traditional DAC)
> policies there are bigger issues than ordering.
It is only safe for Smack because CAP_MAC_ADMIN already conveys all
privileges with respect to Smack.
Otherwise, the BPF program can access information about the object
attributes, e.g. inode attributes,
and leak that information to userspace even if SELinux would have
denied the process that loaded the BPF
program permissions to directly obtain that information. This is also
why Landlock has to be last in the LSM list.
On 3/5/2020 10:03 AM, Stephen Smalley wrote:
> On Thu, Mar 5, 2020 at 12:35 PM Casey Schaufler <[email protected]> wrote:
>> I believe that I have stated that order isn't my issue.
>> Go first, last or as specified in the lsm list, I really
>> don't care. We'll talk about what does matter in the KRSI
>> thread.
> Order matters when the security module logic (in this case, the BPF
> program) is loaded from userspace and
> the userspace process isn't already required to be fully privileged
> with respect to the in-kernel security modules.
> CAP_MAC_ADMIN was their (not unreasonable) attempt to check that
> requirement; it just doesn't happen to convey
> the same meaning for SELinux since SELinux predates the introduction
> of CAP_MAC_ADMIN (in Linux at least) and
> since SELinux was designed to confine even processes with capabilities.
If KRSI "needs" to go last, I'm fine with that. What I continue
to object to is making KRSI/BPF a special case in the code. It
doesn't need to be.
On Thu, Mar 5, 2020 at 10:54 AM KP Singh <[email protected]> wrote:
>
> On 05-Mar 08:51, Stephen Smalley wrote:
> > IIUC you've switched from a model where the BPF program would be
> > invoked after the original function logic
> > and the BPF program is skipped if the original function logic returns
> > non-zero to a model where the BPF program is invoked first and
> > the original function logic is skipped if the BPF program returns
> > non-zero. I'm not keen on that for userspace-loaded code attached
>
> We do want to continue the KRSI series and the effort to implement a
> proper BPF LSM. In the meantime, the tracing + error injection
> solution helps us to:
>
> * Provide better debug capabilities.
> * And parallelize the effort to come up with the right helpers
> for our LSM work and work on sleepable BPF which is also essential
> for some of the helpers.
>
> As you noted, in the KRSI v4 series, we mentioned that we would like
> to have the user-space loaded BPF programs be unable to override the
> decision made by the in-kernel logic/LSMs, but this got shot down:
>
> https://lore.kernel.org/bpf/[email protected]
>
> I would like to continue this discussion when we post the v5 series
> for KRSI as to what the correct precedence order should be for the
> BPF_PROG_TYPE_LSM and would appreciate if you also bring it up there.
That's fine but I guess I don't see why you or anyone else would
bother with introducing a BPF_PROG_TYPE_LSM
if BPF_PROG_MODIFY_RETURN is accepted and is allowed to attach to the
LSM hooks. What's the benefit to you
if you can achieve your goals directly with MODIFY_RETURN?
> > to LSM hooks; it means that userspace BPF programs can run even if
> > SELinux would have denied access and SELinux hooks get
> > skipped entirely if the BPF program returns an error. I think Casey
> > may have wrongly pointed you in this direction on the grounds
> > it can already happen with the base DAC checking logic. But that's
>
> What we can do for this tracing/modify_ret series, is to remove
> the special casing for "security_" functions in the BPF code and add
> ALLOW_ERROR_INJECTION calls to the security hooks. This way, if
> someone needs to disable the BPF programs being able to modify
> security hooks, they can disable error injection. If that's okay, we
> can send a patch.
Realistically distros tend to enable lots of developer-friendly
options including error injection, and most users don't build their
own kernels
and distros won't support them when they do. So telling users they can
just rebuild their kernel without error injection if they care about
BPF programs being able to modify security hooks isn't really viable.
The security modules need a way to veto it based on their policies.
That's why I suggested a security hook here.
On 05-M?r 14:43, Stephen Smalley wrote:
> On Thu, Mar 5, 2020 at 10:54 AM KP Singh <[email protected]> wrote:
> >
> > On 05-Mar 08:51, Stephen Smalley wrote:
> > > IIUC you've switched from a model where the BPF program would be
> > > invoked after the original function logic
> > > and the BPF program is skipped if the original function logic returns
> > > non-zero to a model where the BPF program is invoked first and
> > > the original function logic is skipped if the BPF program returns
> > > non-zero. I'm not keen on that for userspace-loaded code attached
> >
> > We do want to continue the KRSI series and the effort to implement a
> > proper BPF LSM. In the meantime, the tracing + error injection
> > solution helps us to:
> >
> > * Provide better debug capabilities.
> > * And parallelize the effort to come up with the right helpers
> > for our LSM work and work on sleepable BPF which is also essential
> > for some of the helpers.
> >
> > As you noted, in the KRSI v4 series, we mentioned that we would like
> > to have the user-space loaded BPF programs be unable to override the
> > decision made by the in-kernel logic/LSMs, but this got shot down:
> >
> > https://lore.kernel.org/bpf/[email protected]
> >
> > I would like to continue this discussion when we post the v5 series
> > for KRSI as to what the correct precedence order should be for the
> > BPF_PROG_TYPE_LSM and would appreciate if you also bring it up there.
>
> That's fine but I guess I don't see why you or anyone else would
> bother with introducing a BPF_PROG_TYPE_LSM
> if BPF_PROG_MODIFY_RETURN is accepted and is allowed to attach to the
> LSM hooks. What's the benefit to you
> if you can achieve your goals directly with MODIFY_RETURN?
There is still value in being a proper LSM, as I had mentioned in KRSI
v3 that not all security_* wrappers simply call the attached hooks and
return their exit code.
It's also okay, taking into consideration Casey's objections and Kees'
suggestion, to be properly registered with the LSM framework (even if
it is with LSM_ORDER_LAST) and work towards improving some of the
performance bottle-necks in the framework. It would be a positive
outcome for all LSMs.
BPF_MODIFY_RETURN is just a first step which lays the foundation for
BPF_PROG_TYPE_LSM and facilitates us to build BPF infrastructure for
it.
- KP
>
> > > to LSM hooks; it means that userspace BPF programs can run even if
> > > SELinux would have denied access and SELinux hooks get
> > > skipped entirely if the BPF program returns an error. I think Casey
> > > may have wrongly pointed you in this direction on the grounds
> > > it can already happen with the base DAC checking logic. But that's
> >
> > What we can do for this tracing/modify_ret series, is to remove
> > the special casing for "security_" functions in the BPF code and add
> > ALLOW_ERROR_INJECTION calls to the security hooks. This way, if
> > someone needs to disable the BPF programs being able to modify
> > security hooks, they can disable error injection. If that's okay, we
> > can send a patch.
>
> Realistically distros tend to enable lots of developer-friendly
> options including error injection, and most users don't build their
> own kernels
> and distros won't support them when they do. So telling users they can
> just rebuild their kernel without error injection if they care about
> BPF programs being able to modify security hooks isn't really viable.
> The security modules need a way to veto it based on their policies.
> That's why I suggested a security hook here.