From: Menglong Dong <[email protected]>
For now, the BPF program of type BPF_PROG_TYPE_TRACING can only be used
on the kernel functions whose arguments count less than 6. This is not
friendly at all, as too many functions have arguments count more than 6.
According to the current kernel version, below is a statistics of the
function arguments count:
argument count | function count
7 | 704
8 | 270
9 | 84
10 | 47
11 | 47
12 | 27
13 | 22
14 | 5
15 | 0
16 | 1
Therefore, let's enhance it by increasing the function arguments count
allowed in arch_prepare_bpf_trampoline(), for now, only x86_64.
In the 1st patch, we make arch_prepare_bpf_trampoline() support to copy
function arguments in stack for x86 arch. Therefore, the maximum
arguments can be up to MAX_BPF_FUNC_ARGS for FENTRY and FEXIT.
In the 2nd patch, we clean garbage value in upper bytes of the trampoline
when we store the arguments from regs or on-stack into stack.
And the 3rd patches are for the testcases of the 1st patch.
Changes since v3:
- try make the stack pointer 16-byte aligned. Not sure if I'm right :)
- introduce clean_garbage() to clean the grabage when argument count is 7
- use different data type in bpf_testmod_fentry_test{7,12}
- add testcase for grabage values in ctx
Changes since v2:
- keep MAX_BPF_FUNC_ARGS still
- clean garbage value in upper bytes in the 2nd patch
- move bpf_fentry_test{7,12} to bpf_testmod.c and rename them to
bpf_testmod_fentry_test{7,12} meanwhile in the 3rd patch
Changes since v1:
- change the maximun function arguments to 14 from 12
- add testcases (Jiri Olsa)
- instead EMIT4 with EMIT3_off32 for "lea" to prevent overflow
Menglong Dong (3):
bpf, x86: allow function arguments up to 12 for TRACING
bpf, x86: clean garbage value in the stack of trampoline
selftests/bpf: add testcase for FENTRY/FEXIT with 6+ arguments
arch/x86/net/bpf_jit_comp.c | 145 ++++++++++++++++--
.../selftests/bpf/bpf_testmod/bpf_testmod.c | 19 ++-
.../selftests/bpf/prog_tests/fentry_fexit.c | 4 +-
.../selftests/bpf/prog_tests/fentry_test.c | 2 +
.../selftests/bpf/prog_tests/fexit_test.c | 2 +
.../testing/selftests/bpf/progs/fentry_test.c | 33 ++++
.../testing/selftests/bpf/progs/fexit_test.c | 57 +++++++
7 files changed, 245 insertions(+), 17 deletions(-)
--
2.40.1
From: Menglong Dong <[email protected]>
There are garbage values in upper bytes when we store the arguments
into stack in save_regs() if the size of the argument less then 8.
As we already reserve 8 byte for the arguments in regs and stack,
it is ok to store/restore the regs in BPF_DW size. Then, the garbage
values in upper bytes will be cleaned.
Signed-off-by: Menglong Dong <[email protected]>
---
v4:
- clean grabage value when argument count is 7
---
arch/x86/net/bpf_jit_comp.c | 45 ++++++++++++++++++++++++++-----------
1 file changed, 32 insertions(+), 13 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a767e13c8c85..f6f51a5d14db 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1857,6 +1857,28 @@ st: if (is_imm8(insn->off))
return proglen;
}
+static inline void clean_garbage(u8 **pprog, int nr_regs, int stack_size,
+ int arg_size)
+{
+ u8 *prog;
+
+ /* clean potential garbage values in upper 32-bit. 'stack_size'
+ * here is the offset of the 7th argument on-stack.
+ */
+ if (nr_regs == 7 && arg_size <= 4) {
+ int off = -(stack_size - 4);
+
+ prog = *pprog;
+ /* mov DWORD PTR [rbp + off], 0 */
+ if (!is_imm8(off))
+ EMIT2_off32(0xC7, 0x85, off);
+ else
+ EMIT3(0xC7, 0x45, off);
+ EMIT(0, 4);
+ *pprog = prog;
+ }
+}
+
static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
int stack_size)
{
@@ -1878,8 +1900,7 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
if (i <= 5) {
/* copy function arguments from regs into stack */
- emit_stx(prog, bytes_to_bpf_size(arg_size),
- BPF_REG_FP,
+ emit_stx(prog, BPF_DW, BPF_REG_FP,
i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
-(stack_size - i * 8));
} else {
@@ -1893,17 +1914,16 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
* 8(return addr of the caller)
* which means: rbp + 24
*/
- emit_ldx(prog, bytes_to_bpf_size(arg_size),
- BPF_REG_0, BPF_REG_FP,
+ emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
(i - 6) * 8 + 0x18);
- emit_stx(prog, bytes_to_bpf_size(arg_size),
- BPF_REG_FP,
- BPF_REG_0,
+ emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
-(stack_size - i * 8));
}
j = next_same_struct ? j : j + 1;
}
+
+ clean_garbage(prog, nr_regs, stack_size - 6 * 8, arg_size);
}
static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
@@ -1925,7 +1945,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
next_same_struct = !next_same_struct;
}
- emit_ldx(prog, bytes_to_bpf_size(arg_size),
+ emit_ldx(prog, BPF_DW,
i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
BPF_REG_FP,
-(stack_size - i * 8));
@@ -1956,17 +1976,16 @@ static void prepare_origin_stack(const struct btf_func_model *m, u8 **prog,
}
if (i > 5) {
- emit_ldx(prog, bytes_to_bpf_size(arg_size),
- BPF_REG_0, BPF_REG_FP,
+ emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
(i - 6) * 8 + 0x18);
- emit_stx(prog, bytes_to_bpf_size(arg_size),
- BPF_REG_FP,
- BPF_REG_0,
+ emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
-(stack_size - (i - 6) * 8));
}
j = next_same_struct ? j : j + 1;
}
+
+ clean_garbage(prog, nr_regs, stack_size, arg_size);
}
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
--
2.40.1
From: Menglong Dong <[email protected]>
Add test9/test10 in fexit_test.c and fentry_test.c to test the fentry
and fexit whose target function have 7/12 arguments.
Correspondingly, add bpf_testmod_fentry_test7() and
bpf_testmod_fentry_test12() to bpf_testmod.c
And the testcases passed:
./test_progs -t fexit
Summary: 5/12 PASSED, 0 SKIPPED, 0 FAILED
./test_progs -t fentry
Summary: 3/0 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Menglong Dong <[email protected]>
---
v4:
- use different type for args in bpf_testmod_fentry_test{7,12}
- add testcase for grabage values in ctx
v3:
- move bpf_fentry_test{7,12} to bpf_testmod.c and rename them to
bpf_testmod_fentry_test{7,12} meanwhile
- get return value by bpf_get_func_ret() in
"fexit/bpf_testmod_fentry_test12", as we don't change ___bpf_ctx_cast()
in this version
---
.../selftests/bpf/bpf_testmod/bpf_testmod.c | 19 ++++++-
.../selftests/bpf/prog_tests/fentry_fexit.c | 4 +-
.../selftests/bpf/prog_tests/fentry_test.c | 2 +
.../selftests/bpf/prog_tests/fexit_test.c | 2 +
.../testing/selftests/bpf/progs/fentry_test.c | 33 +++++++++++
.../testing/selftests/bpf/progs/fexit_test.c | 57 +++++++++++++++++++
6 files changed, 115 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index cf216041876c..66615fdbe3df 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -191,6 +191,19 @@ noinline int bpf_testmod_fentry_test3(char a, int b, u64 c)
return a + b + c;
}
+noinline int bpf_testmod_fentry_test7(u64 a, void *b, short c, int d,
+ void *e, u64 f, u64 g)
+{
+ return a + (long)b + c + d + (long)e + f + g;
+}
+
+noinline int bpf_testmod_fentry_test12(u64 a, void *b, short c, int d,
+ void *e, u64 f, u64 g, u64 h,
+ u64 i, u64 j, u64 k, u64 l)
+{
+ return a + (long)b + c + d + (long)e + f + g + h + i + j + k + l;
+}
+
__diag_pop();
int bpf_testmod_fentry_ok;
@@ -245,7 +258,11 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
if (bpf_testmod_fentry_test1(1) != 2 ||
bpf_testmod_fentry_test2(2, 3) != 5 ||
- bpf_testmod_fentry_test3(4, 5, 6) != 15)
+ bpf_testmod_fentry_test3(4, 5, 6) != 15 ||
+ bpf_testmod_fentry_test7(16, (void *)17, 18, 19, (void *)20,
+ 21, 22) != 133 ||
+ bpf_testmod_fentry_test12(16, (void *)17, 18, 19, (void *)20,
+ 21, 22, 23, 24, 25, 26, 27) != 258)
goto out;
bpf_testmod_fentry_ok = 1;
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 130f5b82d2e6..0078acee0ede 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -31,10 +31,12 @@ void test_fentry_fexit(void)
ASSERT_OK(err, "ipv6 test_run");
ASSERT_OK(topts.retval, "ipv6 test retval");
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+
fentry_res = (__u64 *)fentry_skel->bss;
fexit_res = (__u64 *)fexit_skel->bss;
printf("%lld\n", fentry_skel->bss->test1_result);
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < 11; i++) {
ASSERT_EQ(fentry_res[i], 1, "fentry result");
ASSERT_EQ(fexit_res[i], 1, "fexit result");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index c0d1d61d5f66..e1c0ce40febf 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -24,6 +24,8 @@ static int fentry_test(struct fentry_test_lskel *fentry_skel)
ASSERT_OK(err, "test_run");
ASSERT_EQ(topts.retval, 0, "test_run");
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+
result = (__u64 *)fentry_skel->bss;
for (i = 0; i < sizeof(*fentry_skel->bss) / sizeof(__u64); i++) {
if (!ASSERT_EQ(result[i], 1, "fentry_result"))
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 101b7343036b..ea81fa913ec6 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -24,6 +24,8 @@ static int fexit_test(struct fexit_test_lskel *fexit_skel)
ASSERT_OK(err, "test_run");
ASSERT_EQ(topts.retval, 0, "test_run");
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+
result = (__u64 *)fexit_skel->bss;
for (i = 0; i < sizeof(*fexit_skel->bss) / sizeof(__u64); i++) {
if (!ASSERT_EQ(result[i], 1, "fexit_result"))
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
index 52a550d281d9..91dbf63b3ba1 100644
--- a/tools/testing/selftests/bpf/progs/fentry_test.c
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -77,3 +77,36 @@ int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
test8_result = 1;
return 0;
}
+
+__u64 test9_result = 0;
+SEC("fentry/bpf_testmod_fentry_test7")
+int BPF_PROG(test9, __u64 a, void *b, short c, int d, void *e, char f,
+ int g)
+{
+ test9_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22;
+ return 0;
+}
+
+__u64 test10_result = 0;
+SEC("fentry/bpf_testmod_fentry_test12")
+int BPF_PROG(test10, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, unsigned int h, long i, __u64 j, unsigned long k,
+ unsigned char l)
+{
+ test10_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26 && l == 27;
+ return 0;
+}
+
+__u64 test11_result = 0;
+SEC("fentry/bpf_testmod_fentry_test12")
+int BPF_PROG(test11, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f,
+ __u64 g, __u64 h, __u64 i, __u64 j, __u64 k, __u64 l)
+{
+ test11_result = a == 16 && b == 17 && c == 18 && d == 19 &&
+ e == 20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26 && l == 27;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
index 8f1ccb7302e1..a6d8e03ff5b7 100644
--- a/tools/testing/selftests/bpf/progs/fexit_test.c
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -78,3 +78,60 @@ int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
test8_result = 1;
return 0;
}
+
+__u64 test9_result = 0;
+SEC("fexit/bpf_testmod_fentry_test7")
+int BPF_PROG(test9, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, int ret)
+{
+ test9_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && ret == 133;
+ return 0;
+}
+
+__u64 test10_result = 0;
+SEC("fexit/bpf_testmod_fentry_test12")
+int BPF_PROG(test10, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, unsigned int h, long i, __u64 j, unsigned long k,
+ unsigned char l)
+{
+ __u64 ret;
+ int err;
+
+ /* BPF_PROG() don't support 14 arguments, and ctx[12] can't be
+ * accessed yet. So we get the return value by bpf_get_func_ret()
+ * for now.
+ */
+ err = bpf_get_func_ret(ctx, &ret);
+ if (err)
+ return 0;
+
+ test10_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26 && l == 27 &&
+ (int)ret == 258;
+ return 0;
+}
+
+__u64 test11_result = 0;
+SEC("fexit/bpf_testmod_fentry_test12")
+int BPF_PROG(test11, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f,
+ __u64 g, __u64 h, __u64 i, __u64 j, __u64 k, __u64 l)
+{
+ __u64 ret;
+ int err;
+
+ /* BPF_PROG() don't support 14 arguments, and ctx[12] can't be
+ * accessed yet. So we get the return value by bpf_get_func_ret()
+ * for now.
+ */
+ err = bpf_get_func_ret(ctx, &ret);
+ if (err)
+ return 0;
+
+ test11_result = a == 16 && b == 17 && c == 18 && d == 19 &&
+ e == 20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26 && l == 27 &&
+ ret == 258;
+ return 0;
+}
--
2.40.1
From: Menglong Dong <[email protected]>
For now, the BPF program of type BPF_PROG_TYPE_TRACING can only be used
on the kernel functions whose arguments count less than 6. This is not
friendly at all, as too many functions have arguments count more than 6.
According to the current kernel version, below is a statistics of the
function arguments count:
argument count | function count
7 | 704
8 | 270
9 | 84
10 | 47
11 | 47
12 | 27
13 | 22
14 | 5
15 | 0
16 | 1
Therefore, let's enhance it by increasing the function arguments count
allowed in arch_prepare_bpf_trampoline(), for now, only x86_64.
For the case that we don't need to call origin function, which means
without BPF_TRAMP_F_CALL_ORIG, we need only copy the function arguments
that stored in the frame of the caller to current frame. The arguments
of arg6-argN are stored in "$rbp + 0x18", we need copy them to
"$rbp - regs_off + (6 * 8)".
For the case with BPF_TRAMP_F_CALL_ORIG, we need prepare the arguments
in stack before call origin function, which means we need alloc extra
"8 * (arg_count - 6)" memory in the top of the stack. Note, there should
not be any data be pushed to the stack before call the origin function.
Then, we have to store rbx with 'mov' instead of 'push'.
We use EMIT3_off32() or EMIT4() for "lea" and "sub". The range of the
imm in "lea" and "sub" is [-128, 127] if EMIT4() is used. Therefore,
we use EMIT3_off32() instead if the imm out of the range.
It works well for the FENTRY/FEXIT/MODIFY_RETURN, I'm not sure if there
are other complicated cases.
Signed-off-by: Menglong Dong <[email protected]>
---
v4:
- make the stack 16-byte aligned if passing args on-stack is needed
- add the function arguments statistics to the commit log
v3:
- use EMIT3_off32() for "lea" and "sub" only on necessary
- make 12 as the maximum arguments count
v2:
- instead EMIT4 with EMIT3_off32 for "lea" to prevent overflow
- make MAX_BPF_FUNC_ARGS as the maximum argument count
---
arch/x86/net/bpf_jit_comp.c | 125 ++++++++++++++++++++++++++++++++----
1 file changed, 111 insertions(+), 14 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 1056bbf55b17..a767e13c8c85 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1868,7 +1868,7 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
* mov QWORD PTR [rbp-0x10],rdi
* mov QWORD PTR [rbp-0x8],rsi
*/
- for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
+ for (i = 0, j = 0; i < min(nr_regs, MAX_BPF_FUNC_ARGS); i++) {
/* The arg_size is at most 16 bytes, enforced by the verifier. */
arg_size = m->arg_size[j];
if (arg_size > 8) {
@@ -1876,10 +1876,31 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
next_same_struct = !next_same_struct;
}
- emit_stx(prog, bytes_to_bpf_size(arg_size),
- BPF_REG_FP,
- i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
- -(stack_size - i * 8));
+ if (i <= 5) {
+ /* copy function arguments from regs into stack */
+ emit_stx(prog, bytes_to_bpf_size(arg_size),
+ BPF_REG_FP,
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ -(stack_size - i * 8));
+ } else {
+ /* copy function arguments from origin stack frame
+ * into current stack frame.
+ *
+ * The starting address of the arguments on-stack
+ * is:
+ * rbp + 8(push rbp) +
+ * 8(return addr of origin call) +
+ * 8(return addr of the caller)
+ * which means: rbp + 24
+ */
+ emit_ldx(prog, bytes_to_bpf_size(arg_size),
+ BPF_REG_0, BPF_REG_FP,
+ (i - 6) * 8 + 0x18);
+ emit_stx(prog, bytes_to_bpf_size(arg_size),
+ BPF_REG_FP,
+ BPF_REG_0,
+ -(stack_size - i * 8));
+ }
j = next_same_struct ? j : j + 1;
}
@@ -1913,6 +1934,41 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
}
}
+static void prepare_origin_stack(const struct btf_func_model *m, u8 **prog,
+ int nr_regs, int stack_size)
+{
+ int i, j, arg_size;
+ bool next_same_struct = false;
+
+ if (nr_regs <= 6)
+ return;
+
+ /* Prepare the function arguments in stack before call origin
+ * function. These arguments must be stored in the top of the
+ * stack.
+ */
+ for (i = 0, j = 0; i < min(nr_regs, MAX_BPF_FUNC_ARGS); i++) {
+ /* The arg_size is at most 16 bytes, enforced by the verifier. */
+ arg_size = m->arg_size[j];
+ if (arg_size > 8) {
+ arg_size = 8;
+ next_same_struct = !next_same_struct;
+ }
+
+ if (i > 5) {
+ emit_ldx(prog, bytes_to_bpf_size(arg_size),
+ BPF_REG_0, BPF_REG_FP,
+ (i - 6) * 8 + 0x18);
+ emit_stx(prog, bytes_to_bpf_size(arg_size),
+ BPF_REG_FP,
+ BPF_REG_0,
+ -(stack_size - (i - 6) * 8));
+ }
+
+ j = next_same_struct ? j : j + 1;
+ }
+}
+
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_link *l, int stack_size,
int run_ctx_off, bool save_ret)
@@ -1938,7 +1994,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
/* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
/* arg2: lea rsi, [rbp - ctx_cookie_off] */
- EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
+ if (!is_imm8(-run_ctx_off))
+ EMIT3_off32(0x48, 0x8D, 0xB5, -run_ctx_off);
+ else
+ EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
return -EINVAL;
@@ -1954,7 +2013,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
emit_nops(&prog, 2);
/* arg1: lea rdi, [rbp - stack_size] */
- EMIT4(0x48, 0x8D, 0x7D, -stack_size);
+ if (!is_imm8(-stack_size))
+ EMIT3_off32(0x48, 0x8D, 0xBD, -stack_size);
+ else
+ EMIT4(0x48, 0x8D, 0x7D, -stack_size);
/* arg2: progs[i]->insnsi for interpreter */
if (!p->jited)
emit_mov_imm64(&prog, BPF_REG_2,
@@ -1984,7 +2046,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
/* arg2: mov rsi, rbx <- start time in nsec */
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
/* arg3: lea rdx, [rbp - run_ctx_off] */
- EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
+ if (!is_imm8(-run_ctx_off))
+ EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off);
+ else
+ EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
return -EINVAL;
@@ -2136,7 +2201,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
void *func_addr)
{
int i, ret, nr_regs = m->nr_args, stack_size = 0;
- int regs_off, nregs_off, ip_off, run_ctx_off;
+ int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -2150,8 +2215,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
nr_regs += (m->arg_size[i] + 7) / 8 - 1;
- /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
- if (nr_regs > 6)
+ /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6
+ * are passed through regs, the remains are through stack.
+ */
+ if (nr_regs > MAX_BPF_FUNC_ARGS)
return -ENOTSUPP;
/* Generated trampoline stack layout:
@@ -2170,7 +2237,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
*
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
*
+ * RBP - rbx_off [ rbx value ] always
+ *
* RBP - run_ctx_off [ bpf_tramp_run_ctx ]
+ *
+ * [ stack_argN ] BPF_TRAMP_F_CALL_ORIG
+ * [ ... ]
+ * [ stack_arg2 ]
+ * RBP - arg_stack_off [ stack_arg1 ]
*/
/* room for return value of orig_call or fentry prog */
@@ -2190,9 +2264,25 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
ip_off = stack_size;
+ stack_size += 8;
+ rbx_off = stack_size;
+
stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
run_ctx_off = stack_size;
+ if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) {
+ stack_size += (nr_regs - 6) * 8;
+ /* make sure the stack pointer is 16-byte aligned if we
+ * need pass arguments on stack, which means
+ * [stack_size + 8(rbp) + 8(rip) + 8(origin rip)]
+ * should be 16-byte aligned. Following code depend on
+ * that stack_size is already 8-byte aligned.
+ */
+ stack_size += (stack_size % 16) ? 0 : 8;
+ }
+
+ arg_stack_off = stack_size;
+
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
@@ -2212,8 +2302,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
x86_call_depth_emit_accounting(&prog, NULL);
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
- EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
- EMIT1(0x53); /* push rbx */
+ if (!is_imm8(stack_size))
+ /* sub rsp, stack_size */
+ EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
+ else
+ /* sub rsp, stack_size */
+ EMIT4(0x48, 0x83, 0xEC, stack_size);
+ /* mov QWORD PTR [rbp - rbx_off], rbx */
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off);
/* Store number of argument registers of the traced function:
* mov rax, nr_regs
@@ -2262,6 +2358,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_CALL_ORIG) {
restore_regs(m, &prog, nr_regs, regs_off);
+ prepare_origin_stack(m, &prog, nr_regs, arg_stack_off);
if (flags & BPF_TRAMP_F_ORIG_STACK) {
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
@@ -2321,7 +2418,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (save_ret)
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
- EMIT1(0x5B); /* pop rbx */
+ emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
EMIT1(0xC9); /* leave */
if (flags & BPF_TRAMP_F_SKIP_FRAME)
/* skip our return address and return to parent */
--
2.40.1
On 6/9/23 2:56 AM, [email protected] wrote:
> From: Menglong Dong <[email protected]>
>
> For now, the BPF program of type BPF_PROG_TYPE_TRACING can only be used
> on the kernel functions whose arguments count less than 6. This is not
> friendly at all, as too many functions have arguments count more than 6.
>
> According to the current kernel version, below is a statistics of the
> function arguments count:
>
> argument count | function count
> 7 | 704
> 8 | 270
> 9 | 84
> 10 | 47
> 11 | 47
> 12 | 27
> 13 | 22
> 14 | 5
> 15 | 0
> 16 | 1
>
> Therefore, let's enhance it by increasing the function arguments count
> allowed in arch_prepare_bpf_trampoline(), for now, only x86_64.
>
> For the case that we don't need to call origin function, which means
> without BPF_TRAMP_F_CALL_ORIG, we need only copy the function arguments
> that stored in the frame of the caller to current frame. The arguments
> of arg6-argN are stored in "$rbp + 0x18", we need copy them to
> "$rbp - regs_off + (6 * 8)".
>
> For the case with BPF_TRAMP_F_CALL_ORIG, we need prepare the arguments
> in stack before call origin function, which means we need alloc extra
> "8 * (arg_count - 6)" memory in the top of the stack. Note, there should
> not be any data be pushed to the stack before call the origin function.
> Then, we have to store rbx with 'mov' instead of 'push'.
>
> We use EMIT3_off32() or EMIT4() for "lea" and "sub". The range of the
> imm in "lea" and "sub" is [-128, 127] if EMIT4() is used. Therefore,
> we use EMIT3_off32() instead if the imm out of the range.
>
> It works well for the FENTRY/FEXIT/MODIFY_RETURN, I'm not sure if there
> are other complicated cases.
Just remove 'I'm not sure if there are other complicated cases'.
Since MODIFY_RETURN is mentioned. It would be great if you can add
a test for MODIFY_RETURN.
>
> Signed-off-by: Menglong Dong <[email protected]>
> ---
> v4:
> - make the stack 16-byte aligned if passing args on-stack is needed
> - add the function arguments statistics to the commit log
> v3:
> - use EMIT3_off32() for "lea" and "sub" only on necessary
> - make 12 as the maximum arguments count
> v2:
> - instead EMIT4 with EMIT3_off32 for "lea" to prevent overflow
> - make MAX_BPF_FUNC_ARGS as the maximum argument count
> ---
> arch/x86/net/bpf_jit_comp.c | 125 ++++++++++++++++++++++++++++++++----
> 1 file changed, 111 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 1056bbf55b17..a767e13c8c85 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -1868,7 +1868,7 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> * mov QWORD PTR [rbp-0x10],rdi
> * mov QWORD PTR [rbp-0x8],rsi
> */
> - for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
> + for (i = 0, j = 0; i < min(nr_regs, MAX_BPF_FUNC_ARGS); i++) {
> /* The arg_size is at most 16 bytes, enforced by the verifier. */
> arg_size = m->arg_size[j];
> if (arg_size > 8) {
> @@ -1876,10 +1876,31 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> next_same_struct = !next_same_struct;
> }
>
> - emit_stx(prog, bytes_to_bpf_size(arg_size),
> - BPF_REG_FP,
> - i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
> - -(stack_size - i * 8));
> + if (i <= 5) {
> + /* copy function arguments from regs into stack */
> + emit_stx(prog, bytes_to_bpf_size(arg_size),
> + BPF_REG_FP,
> + i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
> + -(stack_size - i * 8));
> + } else {
> + /* copy function arguments from origin stack frame
> + * into current stack frame.
> + *
> + * The starting address of the arguments on-stack
> + * is:
> + * rbp + 8(push rbp) +
> + * 8(return addr of origin call) +
> + * 8(return addr of the caller)
> + * which means: rbp + 24
> + */
> + emit_ldx(prog, bytes_to_bpf_size(arg_size),
> + BPF_REG_0, BPF_REG_FP,
> + (i - 6) * 8 + 0x18);
> + emit_stx(prog, bytes_to_bpf_size(arg_size),
> + BPF_REG_FP,
> + BPF_REG_0,
> + -(stack_size - i * 8));
> + }
I think we have a corner case which does not work for the above.
$ cat t.c
struct t {
long a, b;
};
void foo2(int a, int b, int c, int d, int e, struct t);
void bar(struct t arg) {
foo2(1, 2, 3, 4, 5, arg);
}
$ cat run.sh
clang -O2 -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -c t.c
$ ./run.sh
$ llvm-objdump -d t.o
t.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <bar>:
0: 48 83 ec 18 subq $0x18, %rsp
4: 48 89 f0 movq %rsi, %rax
7: 49 89 f9 movq %rdi, %r9
a: 48 89 7c 24 08 movq %rdi, 0x8(%rsp)
f: 48 89 74 24 10 movq %rsi, 0x10(%rsp)
14: bf 01 00 00 00 movl $0x1, %edi
19: be 02 00 00 00 movl $0x2, %esi
1e: ba 03 00 00 00 movl $0x3, %edx
23: b9 04 00 00 00 movl $0x4, %ecx
28: 41 b8 05 00 00 00 movl $0x5, %r8d
2e: 50 pushq %rax
2f: 41 51 pushq %r9
31: e8 00 00 00 00 callq 0x36 <bar+0x36>
36: 48 83 c4 28 addq $0x28, %rsp
3a: c3 retq
$
In this particular case, there is a struct argument (16-bytes).
Only 5 registers are used to pass arguments instead of normal 6.
The struct parameter is put on the stack. Basically struct
members should be all in register or all on the stack.
Not sure whether the kernel code contains similar instances
or not (not fully using 6 registers while some parameters on stack).
If not, I guess we do not need to support the above pattern.
>
> j = next_same_struct ? j : j + 1;
> }
> @@ -1913,6 +1934,41 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> }
> }
>
> +static void prepare_origin_stack(const struct btf_func_model *m, u8 **prog,
> + int nr_regs, int stack_size)
> +{
> + int i, j, arg_size;
> + bool next_same_struct = false;
> +
> + if (nr_regs <= 6)
> + return;
> +
> + /* Prepare the function arguments in stack before call origin
> + * function. These arguments must be stored in the top of the
> + * stack.
> + */
> + for (i = 0, j = 0; i < min(nr_regs, MAX_BPF_FUNC_ARGS); i++) {
> + /* The arg_size is at most 16 bytes, enforced by the verifier. */
> + arg_size = m->arg_size[j];
> + if (arg_size > 8) {
> + arg_size = 8;
> + next_same_struct = !next_same_struct;
> + }
> +
> + if (i > 5) {
> + emit_ldx(prog, bytes_to_bpf_size(arg_size),
> + BPF_REG_0, BPF_REG_FP,
> + (i - 6) * 8 + 0x18);
> + emit_stx(prog, bytes_to_bpf_size(arg_size),
> + BPF_REG_FP,
> + BPF_REG_0,
> + -(stack_size - (i - 6) * 8));
> + }
> +
> + j = next_same_struct ? j : j + 1;
> + }
> +}
> +
> static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> struct bpf_tramp_link *l, int stack_size,
> int run_ctx_off, bool save_ret)
> @@ -1938,7 +1994,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> /* arg1: mov rdi, progs[i] */
> emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
> /* arg2: lea rsi, [rbp - ctx_cookie_off] */
> - EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
> + if (!is_imm8(-run_ctx_off))
> + EMIT3_off32(0x48, 0x8D, 0xB5, -run_ctx_off);
> + else
> + EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
>
> if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
> return -EINVAL;
> @@ -1954,7 +2013,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> emit_nops(&prog, 2);
>
> /* arg1: lea rdi, [rbp - stack_size] */
> - EMIT4(0x48, 0x8D, 0x7D, -stack_size);
> + if (!is_imm8(-stack_size))
> + EMIT3_off32(0x48, 0x8D, 0xBD, -stack_size);
> + else
> + EMIT4(0x48, 0x8D, 0x7D, -stack_size);
> /* arg2: progs[i]->insnsi for interpreter */
> if (!p->jited)
> emit_mov_imm64(&prog, BPF_REG_2,
> @@ -1984,7 +2046,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> /* arg2: mov rsi, rbx <- start time in nsec */
> emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
> /* arg3: lea rdx, [rbp - run_ctx_off] */
> - EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
> + if (!is_imm8(-run_ctx_off))
> + EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off);
> + else
> + EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
> if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
> return -EINVAL;
>
> @@ -2136,7 +2201,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
> void *func_addr)
> {
> int i, ret, nr_regs = m->nr_args, stack_size = 0;
> - int regs_off, nregs_off, ip_off, run_ctx_off;
> + int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
> struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
> @@ -2150,8 +2215,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
> if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
> nr_regs += (m->arg_size[i] + 7) / 8 - 1;
>
> - /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
> - if (nr_regs > 6)
> + /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6
> + * are passed through regs, the remains are through stack.
> + */
> + if (nr_regs > MAX_BPF_FUNC_ARGS)
> return -ENOTSUPP;
>
> /* Generated trampoline stack layout:
> @@ -2170,7 +2237,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
> *
> * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
> *
> + * RBP - rbx_off [ rbx value ] always
> + *
> * RBP - run_ctx_off [ bpf_tramp_run_ctx ]
> + *
> + * [ stack_argN ] BPF_TRAMP_F_CALL_ORIG
> + * [ ... ]
> + * [ stack_arg2 ]
> + * RBP - arg_stack_off [ stack_arg1 ]
> */
>
> /* room for return value of orig_call or fentry prog */
> @@ -2190,9 +2264,25 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>
> ip_off = stack_size;
>
> + stack_size += 8;
> + rbx_off = stack_size;
> +
> stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
> run_ctx_off = stack_size;
>
> + if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) {
> + stack_size += (nr_regs - 6) * 8;
> + /* make sure the stack pointer is 16-byte aligned if we
> + * need pass arguments on stack, which means
> + * [stack_size + 8(rbp) + 8(rip) + 8(origin rip)]
> + * should be 16-byte aligned. Following code depend on
> + * that stack_size is already 8-byte aligned.
> + */
> + stack_size += (stack_size % 16) ? 0 : 8;
I think this is correct.
> + }
> +
> + arg_stack_off = stack_size;
> +
[...]
On 6/9/23 2:56 AM, [email protected] wrote:
> From: Menglong Dong <[email protected]>
>
> There are garbage values in upper bytes when we store the arguments
> into stack in save_regs() if the size of the argument less then 8.
>
> As we already reserve 8 byte for the arguments in regs and stack,
> it is ok to store/restore the regs in BPF_DW size. Then, the garbage
> values in upper bytes will be cleaned.
>
> Signed-off-by: Menglong Dong <[email protected]>
> ---
> v4:
> - clean grabage value when argument count is 7
> ---
> arch/x86/net/bpf_jit_comp.c | 45 ++++++++++++++++++++++++++-----------
> 1 file changed, 32 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index a767e13c8c85..f6f51a5d14db 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -1857,6 +1857,28 @@ st: if (is_imm8(insn->off))
> return proglen;
> }
>
> +static inline void clean_garbage(u8 **pprog, int nr_regs, int stack_size,
> + int arg_size)
> +{
> + u8 *prog;
> +
> + /* clean potential garbage values in upper 32-bit. 'stack_size'
> + * here is the offset of the 7th argument on-stack.
Here, we have a huge assumption that if only 7 registers, compiler might
just allocate a 8-byte stack space and write the value to it. If the
type of the value is <= 32bit, a 32bit store will be on the stack.
So in this case, the upper 32bit needs to be cleared.
If the number of arguments (in terms of number of registers) is more
than 7, the extra arguments will be 'pushed' to the stack, so there
is no garbage. This could be true. But please add enough details
here so people knows why we special case this particular instance.
> + */
> + if (nr_regs == 7 && arg_size <= 4) {
> + int off = -(stack_size - 4);
> +
> + prog = *pprog;
> + /* mov DWORD PTR [rbp + off], 0 */
> + if (!is_imm8(off))
> + EMIT2_off32(0xC7, 0x85, off);
> + else
> + EMIT3(0xC7, 0x45, off);
> + EMIT(0, 4);
> + *pprog = prog;
> + }
> +}
> +
> static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> int stack_size)
> {
> @@ -1878,8 +1900,7 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
>
> if (i <= 5) {
> /* copy function arguments from regs into stack */
> - emit_stx(prog, bytes_to_bpf_size(arg_size),
> - BPF_REG_FP,
> + emit_stx(prog, BPF_DW, BPF_REG_FP,
> i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
> -(stack_size - i * 8));
> } else {
> @@ -1893,17 +1914,16 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> * 8(return addr of the caller)
> * which means: rbp + 24
> */
> - emit_ldx(prog, bytes_to_bpf_size(arg_size),
> - BPF_REG_0, BPF_REG_FP,
> + emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
> (i - 6) * 8 + 0x18);
> - emit_stx(prog, bytes_to_bpf_size(arg_size),
> - BPF_REG_FP,
> - BPF_REG_0,
> + emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
> -(stack_size - i * 8));
> }
>
> j = next_same_struct ? j : j + 1;
> }
> +
> + clean_garbage(prog, nr_regs, stack_size - 6 * 8, arg_size);
> }
>
> static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> @@ -1925,7 +1945,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> next_same_struct = !next_same_struct;
> }
>
> - emit_ldx(prog, bytes_to_bpf_size(arg_size),
> + emit_ldx(prog, BPF_DW,
> i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
> BPF_REG_FP,
> -(stack_size - i * 8));
> @@ -1956,17 +1976,16 @@ static void prepare_origin_stack(const struct btf_func_model *m, u8 **prog,
> }
>
> if (i > 5) {
> - emit_ldx(prog, bytes_to_bpf_size(arg_size),
> - BPF_REG_0, BPF_REG_FP,
> + emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
> (i - 6) * 8 + 0x18);
> - emit_stx(prog, bytes_to_bpf_size(arg_size),
> - BPF_REG_FP,
> - BPF_REG_0,
> + emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
> -(stack_size - (i - 6) * 8));
> }
>
> j = next_same_struct ? j : j + 1;
> }
> +
> + clean_garbage(prog, nr_regs, stack_size, arg_size);
> }
>
> static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
On 6/9/23 2:56 AM, [email protected] wrote:
> From: Menglong Dong <[email protected]>
>
> For now, the BPF program of type BPF_PROG_TYPE_TRACING can only be used
> on the kernel functions whose arguments count less than 6. This is not
> friendly at all, as too many functions have arguments count more than 6.
> According to the current kernel version, below is a statistics of the
> function arguments count:
>
> argument count | function count
> 7 | 704
> 8 | 270
> 9 | 84
> 10 | 47
> 11 | 47
> 12 | 27
> 13 | 22
> 14 | 5
> 15 | 0
> 16 | 1
>
> Therefore, let's enhance it by increasing the function arguments count
> allowed in arch_prepare_bpf_trampoline(), for now, only x86_64.
>
> In the 1st patch, we make arch_prepare_bpf_trampoline() support to copy
> function arguments in stack for x86 arch. Therefore, the maximum
> arguments can be up to MAX_BPF_FUNC_ARGS for FENTRY and FEXIT.
>
> In the 2nd patch, we clean garbage value in upper bytes of the trampoline
> when we store the arguments from regs or on-stack into stack.
>
> And the 3rd patches are for the testcases of the 1st patch.
>
> Changes since v3:
> - try make the stack pointer 16-byte aligned. Not sure if I'm right :)
> - introduce clean_garbage() to clean the grabage when argument count is 7
> - use different data type in bpf_testmod_fentry_test{7,12}
> - add testcase for grabage values in ctx
>
> Changes since v2:
> - keep MAX_BPF_FUNC_ARGS still
> - clean garbage value in upper bytes in the 2nd patch
> - move bpf_fentry_test{7,12} to bpf_testmod.c and rename them to
> bpf_testmod_fentry_test{7,12} meanwhile in the 3rd patch
>
> Changes since v1:
> - change the maximun function arguments to 14 from 12
> - add testcases (Jiri Olsa)
> - instead EMIT4 with EMIT3_off32 for "lea" to prevent overflow
>
> Menglong Dong (3):
> bpf, x86: allow function arguments up to 12 for TRACING
> bpf, x86: clean garbage value in the stack of trampoline
> selftests/bpf: add testcase for FENTRY/FEXIT with 6+ arguments
>
> arch/x86/net/bpf_jit_comp.c | 145 ++++++++++++++++--
> .../selftests/bpf/bpf_testmod/bpf_testmod.c | 19 ++-
> .../selftests/bpf/prog_tests/fentry_fexit.c | 4 +-
> .../selftests/bpf/prog_tests/fentry_test.c | 2 +
> .../selftests/bpf/prog_tests/fexit_test.c | 2 +
> .../testing/selftests/bpf/progs/fentry_test.c | 33 ++++
> .../testing/selftests/bpf/progs/fexit_test.c | 57 +++++++
> 7 files changed, 245 insertions(+), 17 deletions(-)
Also replace rebase on top of bpf-next. Patch 3 cannot be applied.
On 6/9/23 2:56 AM, [email protected] wrote:
> From: Menglong Dong <[email protected]>
>
> Add test9/test10 in fexit_test.c and fentry_test.c to test the fentry
> and fexit whose target function have 7/12 arguments.
>
> Correspondingly, add bpf_testmod_fentry_test7() and
> bpf_testmod_fentry_test12() to bpf_testmod.c
>
> And the testcases passed:
>
> ./test_progs -t fexit
> Summary: 5/12 PASSED, 0 SKIPPED, 0 FAILED
>
> ./test_progs -t fentry
> Summary: 3/0 PASSED, 0 SKIPPED, 0 FAILED
>
> Signed-off-by: Menglong Dong <[email protected]>
> ---
> v4:
> - use different type for args in bpf_testmod_fentry_test{7,12}
> - add testcase for grabage values in ctx
> v3:
> - move bpf_fentry_test{7,12} to bpf_testmod.c and rename them to
> bpf_testmod_fentry_test{7,12} meanwhile
> - get return value by bpf_get_func_ret() in
> "fexit/bpf_testmod_fentry_test12", as we don't change ___bpf_ctx_cast()
> in this version
> ---
> .../selftests/bpf/bpf_testmod/bpf_testmod.c | 19 ++++++-
> .../selftests/bpf/prog_tests/fentry_fexit.c | 4 +-
> .../selftests/bpf/prog_tests/fentry_test.c | 2 +
> .../selftests/bpf/prog_tests/fexit_test.c | 2 +
> .../testing/selftests/bpf/progs/fentry_test.c | 33 +++++++++++
> .../testing/selftests/bpf/progs/fexit_test.c | 57 +++++++++++++++++++
> 6 files changed, 115 insertions(+), 2 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> index cf216041876c..66615fdbe3df 100644
> --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> @@ -191,6 +191,19 @@ noinline int bpf_testmod_fentry_test3(char a, int b, u64 c)
> return a + b + c;
> }
>
> +noinline int bpf_testmod_fentry_test7(u64 a, void *b, short c, int d,
> + void *e, u64 f, u64 g)
> +{
> + return a + (long)b + c + d + (long)e + f + g;
> +}
> +
> +noinline int bpf_testmod_fentry_test12(u64 a, void *b, short c, int d,
> + void *e, u64 f, u64 g, u64 h,
> + u64 i, u64 j, u64 k, u64 l)
> +{
> + return a + (long)b + c + d + (long)e + f + g + h + i + j + k + l;
> +}
It would be great to add a couple cases with struct arguments
where each struct has 8 < struct_size <= 16.
> __diag_pop();
>
> int bpf_testmod_fentry_ok;
> @@ -245,7 +258,11 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
>
> if (bpf_testmod_fentry_test1(1) != 2 ||
> bpf_testmod_fentry_test2(2, 3) != 5 ||
> - bpf_testmod_fentry_test3(4, 5, 6) != 15)
> + bpf_testmod_fentry_test3(4, 5, 6) != 15 ||
> + bpf_testmod_fentry_test7(16, (void *)17, 18, 19, (void *)20,
> + 21, 22) != 133 ||
> + bpf_testmod_fentry_test12(16, (void *)17, 18, 19, (void *)20,
> + 21, 22, 23, 24, 25, 26, 27) != 258)
> goto out;
>
> bpf_testmod_fentry_ok = 1;
[...]
> diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
> index 8f1ccb7302e1..a6d8e03ff5b7 100644
> --- a/tools/testing/selftests/bpf/progs/fexit_test.c
> +++ b/tools/testing/selftests/bpf/progs/fexit_test.c
> @@ -78,3 +78,60 @@ int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
> test8_result = 1;
> return 0;
> }
> +
> +__u64 test9_result = 0;
> +SEC("fexit/bpf_testmod_fentry_test7")
> +int BPF_PROG(test9, __u64 a, void *b, short c, int d, void *e, char f,
> + int g, int ret)
> +{
> + test9_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
> + e == (void *)20 && f == 21 && g == 22 && ret == 133;
> + return 0;
> +}
> +
> +__u64 test10_result = 0;
> +SEC("fexit/bpf_testmod_fentry_test12")
> +int BPF_PROG(test10, __u64 a, void *b, short c, int d, void *e, char f,
> + int g, unsigned int h, long i, __u64 j, unsigned long k,
> + unsigned char l)
> +{
> + __u64 ret;
> + int err;
> +
> + /* BPF_PROG() don't support 14 arguments, and ctx[12] can't be
> + * accessed yet. So we get the return value by bpf_get_func_ret()
> + * for now.
> + */
> + err = bpf_get_func_ret(ctx, &ret);
Maybe just have 11 arguments for this test case?
> + if (err)
> + return 0;
> +
> + test10_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
> + e == (void *)20 && f == 21 && g == 22 && h == 23 &&
> + i == 24 && j == 25 && k == 26 && l == 27 &&
> + (int)ret == 258;
> + return 0;
> +}
> +
> +__u64 test11_result = 0;
> +SEC("fexit/bpf_testmod_fentry_test12")
> +int BPF_PROG(test11, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f,
> + __u64 g, __u64 h, __u64 i, __u64 j, __u64 k, __u64 l)
> +{
> + __u64 ret;
> + int err;
> +
> + /* BPF_PROG() don't support 14 arguments, and ctx[12] can't be
> + * accessed yet. So we get the return value by bpf_get_func_ret()
> + * for now.
> + */
> + err = bpf_get_func_ret(ctx, &ret);
> + if (err)
> + return 0;
> +
> + test11_result = a == 16 && b == 17 && c == 18 && d == 19 &&
> + e == 20 && f == 21 && g == 22 && h == 23 &&
> + i == 24 && j == 25 && k == 26 && l == 27 &&
> + ret == 258;
> + return 0;
> +}
On Sat, Jun 10, 2023 at 11:20 AM Yonghong Song <[email protected]> wrote:
>
>
>
> On 6/9/23 2:56 AM, [email protected] wrote:
> > From: Menglong Dong <[email protected]>
> >
> > There are garbage values in upper bytes when we store the arguments
> > into stack in save_regs() if the size of the argument less then 8.
> >
> > As we already reserve 8 byte for the arguments in regs and stack,
> > it is ok to store/restore the regs in BPF_DW size. Then, the garbage
> > values in upper bytes will be cleaned.
> >
> > Signed-off-by: Menglong Dong <[email protected]>
> > ---
> > v4:
> > - clean grabage value when argument count is 7
> > ---
> > arch/x86/net/bpf_jit_comp.c | 45 ++++++++++++++++++++++++++-----------
> > 1 file changed, 32 insertions(+), 13 deletions(-)
> >
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > index a767e13c8c85..f6f51a5d14db 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -1857,6 +1857,28 @@ st: if (is_imm8(insn->off))
> > return proglen;
> > }
> >
> > +static inline void clean_garbage(u8 **pprog, int nr_regs, int stack_size,
> > + int arg_size)
> > +{
> > + u8 *prog;
> > +
> > + /* clean potential garbage values in upper 32-bit. 'stack_size'
> > + * here is the offset of the 7th argument on-stack.
>
> Here, we have a huge assumption that if only 7 registers, compiler might
> just allocate a 8-byte stack space and write the value to it. If the
> type of the value is <= 32bit, a 32bit store will be on the stack.
> So in this case, the upper 32bit needs to be cleared.
> If the number of arguments (in terms of number of registers) is more
> than 7, the extra arguments will be 'pushed' to the stack, so there
> is no garbage. This could be true. But please add enough details
> here so people knows why we special case this particular instance.
>
Yeah, this indeed is a huge assumption. I'll add more
comments here to help others understand this case.
Thanks!
Menglong Dong
> > + */
> > + if (nr_regs == 7 && arg_size <= 4) {
> > + int off = -(stack_size - 4);
> > +
> > + prog = *pprog;
> > + /* mov DWORD PTR [rbp + off], 0 */
> > + if (!is_imm8(off))
> > + EMIT2_off32(0xC7, 0x85, off);
> > + else
> > + EMIT3(0xC7, 0x45, off);
> > + EMIT(0, 4);
> > + *pprog = prog;
> > + }
> > +}
> > +
> > static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> > int stack_size)
> > {
> > @@ -1878,8 +1900,7 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> >
> > if (i <= 5) {
> > /* copy function arguments from regs into stack */
> > - emit_stx(prog, bytes_to_bpf_size(arg_size),
> > - BPF_REG_FP,
> > + emit_stx(prog, BPF_DW, BPF_REG_FP,
> > i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
> > -(stack_size - i * 8));
> > } else {
> > @@ -1893,17 +1914,16 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> > * 8(return addr of the caller)
> > * which means: rbp + 24
> > */
> > - emit_ldx(prog, bytes_to_bpf_size(arg_size),
> > - BPF_REG_0, BPF_REG_FP,
> > + emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
> > (i - 6) * 8 + 0x18);
> > - emit_stx(prog, bytes_to_bpf_size(arg_size),
> > - BPF_REG_FP,
> > - BPF_REG_0,
> > + emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
> > -(stack_size - i * 8));
> > }
> >
> > j = next_same_struct ? j : j + 1;
> > }
> > +
> > + clean_garbage(prog, nr_regs, stack_size - 6 * 8, arg_size);
> > }
> >
> > static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> > @@ -1925,7 +1945,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> > next_same_struct = !next_same_struct;
> > }
> >
> > - emit_ldx(prog, bytes_to_bpf_size(arg_size),
> > + emit_ldx(prog, BPF_DW,
> > i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
> > BPF_REG_FP,
> > -(stack_size - i * 8));
> > @@ -1956,17 +1976,16 @@ static void prepare_origin_stack(const struct btf_func_model *m, u8 **prog,
> > }
> >
> > if (i > 5) {
> > - emit_ldx(prog, bytes_to_bpf_size(arg_size),
> > - BPF_REG_0, BPF_REG_FP,
> > + emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
> > (i - 6) * 8 + 0x18);
> > - emit_stx(prog, bytes_to_bpf_size(arg_size),
> > - BPF_REG_FP,
> > - BPF_REG_0,
> > + emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
> > -(stack_size - (i - 6) * 8));
> > }
> >
> > j = next_same_struct ? j : j + 1;
> > }
> > +
> > + clean_garbage(prog, nr_regs, stack_size, arg_size);
> > }
> >
> > static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
On Sat, Jun 10, 2023 at 11:51 AM Yonghong Song <[email protected]> wrote:
>
>
>
> On 6/9/23 2:56 AM, [email protected] wrote:
> > From: Menglong Dong <[email protected]>
> >
> > For now, the BPF program of type BPF_PROG_TYPE_TRACING can only be used
> > on the kernel functions whose arguments count less than 6. This is not
> > friendly at all, as too many functions have arguments count more than 6.
> > According to the current kernel version, below is a statistics of the
> > function arguments count:
> >
> > argument count | function count
> > 7 | 704
> > 8 | 270
> > 9 | 84
> > 10 | 47
> > 11 | 47
> > 12 | 27
> > 13 | 22
> > 14 | 5
> > 15 | 0
> > 16 | 1
> >
> > Therefore, let's enhance it by increasing the function arguments count
> > allowed in arch_prepare_bpf_trampoline(), for now, only x86_64.
> >
> > In the 1st patch, we make arch_prepare_bpf_trampoline() support to copy
> > function arguments in stack for x86 arch. Therefore, the maximum
> > arguments can be up to MAX_BPF_FUNC_ARGS for FENTRY and FEXIT.
> >
> > In the 2nd patch, we clean garbage value in upper bytes of the trampoline
> > when we store the arguments from regs or on-stack into stack.
> >
> > And the 3rd patches are for the testcases of the 1st patch.
> >
> > Changes since v3:
> > - try make the stack pointer 16-byte aligned. Not sure if I'm right :)
> > - introduce clean_garbage() to clean the grabage when argument count is 7
> > - use different data type in bpf_testmod_fentry_test{7,12}
> > - add testcase for grabage values in ctx
> >
> > Changes since v2:
> > - keep MAX_BPF_FUNC_ARGS still
> > - clean garbage value in upper bytes in the 2nd patch
> > - move bpf_fentry_test{7,12} to bpf_testmod.c and rename them to
> > bpf_testmod_fentry_test{7,12} meanwhile in the 3rd patch
> >
> > Changes since v1:
> > - change the maximun function arguments to 14 from 12
> > - add testcases (Jiri Olsa)
> > - instead EMIT4 with EMIT3_off32 for "lea" to prevent overflow
> >
> > Menglong Dong (3):
> > bpf, x86: allow function arguments up to 12 for TRACING
> > bpf, x86: clean garbage value in the stack of trampoline
> > selftests/bpf: add testcase for FENTRY/FEXIT with 6+ arguments
> >
> > arch/x86/net/bpf_jit_comp.c | 145 ++++++++++++++++--
> > .../selftests/bpf/bpf_testmod/bpf_testmod.c | 19 ++-
> > .../selftests/bpf/prog_tests/fentry_fexit.c | 4 +-
> > .../selftests/bpf/prog_tests/fentry_test.c | 2 +
> > .../selftests/bpf/prog_tests/fexit_test.c | 2 +
> > .../testing/selftests/bpf/progs/fentry_test.c | 33 ++++
> > .../testing/selftests/bpf/progs/fexit_test.c | 57 +++++++
> > 7 files changed, 245 insertions(+), 17 deletions(-)
>
> Also replace rebase on top of bpf-next. Patch 3 cannot be applied.
Okay!
On Sat, Jun 10, 2023 at 11:05 AM Yonghong Song <[email protected]> wrote:
>
>
>
> On 6/9/23 2:56 AM, [email protected] wrote:
> > From: Menglong Dong <[email protected]>
> >
> > For now, the BPF program of type BPF_PROG_TYPE_TRACING can only be used
> > on the kernel functions whose arguments count less than 6. This is not
> > friendly at all, as too many functions have arguments count more than 6.
> >
> > According to the current kernel version, below is a statistics of the
> > function arguments count:
> >
> > argument count | function count
> > 7 | 704
> > 8 | 270
> > 9 | 84
> > 10 | 47
> > 11 | 47
> > 12 | 27
> > 13 | 22
> > 14 | 5
> > 15 | 0
> > 16 | 1
> >
> > Therefore, let's enhance it by increasing the function arguments count
> > allowed in arch_prepare_bpf_trampoline(), for now, only x86_64.
> >
> > For the case that we don't need to call origin function, which means
> > without BPF_TRAMP_F_CALL_ORIG, we need only copy the function arguments
> > that stored in the frame of the caller to current frame. The arguments
> > of arg6-argN are stored in "$rbp + 0x18", we need copy them to
> > "$rbp - regs_off + (6 * 8)".
> >
> > For the case with BPF_TRAMP_F_CALL_ORIG, we need prepare the arguments
> > in stack before call origin function, which means we need alloc extra
> > "8 * (arg_count - 6)" memory in the top of the stack. Note, there should
> > not be any data be pushed to the stack before call the origin function.
> > Then, we have to store rbx with 'mov' instead of 'push'.
> >
> > We use EMIT3_off32() or EMIT4() for "lea" and "sub". The range of the
> > imm in "lea" and "sub" is [-128, 127] if EMIT4() is used. Therefore,
> > we use EMIT3_off32() instead if the imm out of the range.
> >
> > It works well for the FENTRY/FEXIT/MODIFY_RETURN, I'm not sure if there
> > are other complicated cases.
>
> Just remove 'I'm not sure if there are other complicated cases'.
> Since MODIFY_RETURN is mentioned. It would be great if you can add
> a test for MODIFY_RETURN.
Right, I'll add corresponding test cases for MODIFY_RETURN.
>
> >
> > Signed-off-by: Menglong Dong <[email protected]>
> > ---
> > v4:
> > - make the stack 16-byte aligned if passing args on-stack is needed
> > - add the function arguments statistics to the commit log
> > v3:
> > - use EMIT3_off32() for "lea" and "sub" only on necessary
> > - make 12 as the maximum arguments count
> > v2:
> > - instead EMIT4 with EMIT3_off32 for "lea" to prevent overflow
> > - make MAX_BPF_FUNC_ARGS as the maximum argument count
> > ---
> > arch/x86/net/bpf_jit_comp.c | 125 ++++++++++++++++++++++++++++++++----
> > 1 file changed, 111 insertions(+), 14 deletions(-)
> >
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > index 1056bbf55b17..a767e13c8c85 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -1868,7 +1868,7 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> > * mov QWORD PTR [rbp-0x10],rdi
> > * mov QWORD PTR [rbp-0x8],rsi
> > */
> > - for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
> > + for (i = 0, j = 0; i < min(nr_regs, MAX_BPF_FUNC_ARGS); i++) {
> > /* The arg_size is at most 16 bytes, enforced by the verifier. */
> > arg_size = m->arg_size[j];
> > if (arg_size > 8) {
> > @@ -1876,10 +1876,31 @@ static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> > next_same_struct = !next_same_struct;
> > }
> >
> > - emit_stx(prog, bytes_to_bpf_size(arg_size),
> > - BPF_REG_FP,
> > - i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
> > - -(stack_size - i * 8));
> > + if (i <= 5) {
> > + /* copy function arguments from regs into stack */
> > + emit_stx(prog, bytes_to_bpf_size(arg_size),
> > + BPF_REG_FP,
> > + i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
> > + -(stack_size - i * 8));
> > + } else {
> > + /* copy function arguments from origin stack frame
> > + * into current stack frame.
> > + *
> > + * The starting address of the arguments on-stack
> > + * is:
> > + * rbp + 8(push rbp) +
> > + * 8(return addr of origin call) +
> > + * 8(return addr of the caller)
> > + * which means: rbp + 24
> > + */
> > + emit_ldx(prog, bytes_to_bpf_size(arg_size),
> > + BPF_REG_0, BPF_REG_FP,
> > + (i - 6) * 8 + 0x18);
> > + emit_stx(prog, bytes_to_bpf_size(arg_size),
> > + BPF_REG_FP,
> > + BPF_REG_0,
> > + -(stack_size - i * 8));
> > + }
>
> I think we have a corner case which does not work for the above.
>
> $ cat t.c
> struct t {
> long a, b;
> };
>
> void foo2(int a, int b, int c, int d, int e, struct t);
> void bar(struct t arg) {
> foo2(1, 2, 3, 4, 5, arg);
> }
> $ cat run.sh
> clang -O2 -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -c t.c
> $ ./run.sh
> $ llvm-objdump -d t.o
>
> t.o: file format elf64-x86-64
>
> Disassembly of section .text:
>
> 0000000000000000 <bar>:
> 0: 48 83 ec 18 subq $0x18, %rsp
> 4: 48 89 f0 movq %rsi, %rax
> 7: 49 89 f9 movq %rdi, %r9
> a: 48 89 7c 24 08 movq %rdi, 0x8(%rsp)
> f: 48 89 74 24 10 movq %rsi, 0x10(%rsp)
> 14: bf 01 00 00 00 movl $0x1, %edi
> 19: be 02 00 00 00 movl $0x2, %esi
> 1e: ba 03 00 00 00 movl $0x3, %edx
> 23: b9 04 00 00 00 movl $0x4, %ecx
> 28: 41 b8 05 00 00 00 movl $0x5, %r8d
> 2e: 50 pushq %rax
> 2f: 41 51 pushq %r9
> 31: e8 00 00 00 00 callq 0x36 <bar+0x36>
> 36: 48 83 c4 28 addq $0x28, %rsp
> 3a: c3 retq
> $
>
> In this particular case, there is a struct argument (16-bytes).
> Only 5 registers are used to pass arguments instead of normal 6.
> The struct parameter is put on the stack. Basically struct
> members should be all in register or all on the stack.
>
You are right! I tested on both clang and gcc, and verified the
rule "struct members should be all in register or all on the stack".
> Not sure whether the kernel code contains similar instances
> or not (not fully using 6 registers while some parameters on stack).
> If not, I guess we do not need to support the above pattern.
>
I think we'd better support it to avoid any error
in the feature. I'll add this part in the next version.
Thanks!
Menglong Dong
> >
> > j = next_same_struct ? j : j + 1;
> > }
> > @@ -1913,6 +1934,41 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
> > }
> > }
> >
> > +static void prepare_origin_stack(const struct btf_func_model *m, u8 **prog,
> > + int nr_regs, int stack_size)
> > +{
> > + int i, j, arg_size;
> > + bool next_same_struct = false;
> > +
> > + if (nr_regs <= 6)
> > + return;
> > +
> > + /* Prepare the function arguments in stack before call origin
> > + * function. These arguments must be stored in the top of the
> > + * stack.
> > + */
> > + for (i = 0, j = 0; i < min(nr_regs, MAX_BPF_FUNC_ARGS); i++) {
> > + /* The arg_size is at most 16 bytes, enforced by the verifier. */
> > + arg_size = m->arg_size[j];
> > + if (arg_size > 8) {
> > + arg_size = 8;
> > + next_same_struct = !next_same_struct;
> > + }
> > +
> > + if (i > 5) {
> > + emit_ldx(prog, bytes_to_bpf_size(arg_size),
> > + BPF_REG_0, BPF_REG_FP,
> > + (i - 6) * 8 + 0x18);
> > + emit_stx(prog, bytes_to_bpf_size(arg_size),
> > + BPF_REG_FP,
> > + BPF_REG_0,
> > + -(stack_size - (i - 6) * 8));
> > + }
> > +
> > + j = next_same_struct ? j : j + 1;
> > + }
> > +}
> > +
> > static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> > struct bpf_tramp_link *l, int stack_size,
> > int run_ctx_off, bool save_ret)
> > @@ -1938,7 +1994,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> > /* arg1: mov rdi, progs[i] */
> > emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
> > /* arg2: lea rsi, [rbp - ctx_cookie_off] */
> > - EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
> > + if (!is_imm8(-run_ctx_off))
> > + EMIT3_off32(0x48, 0x8D, 0xB5, -run_ctx_off);
> > + else
> > + EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
> >
> > if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
> > return -EINVAL;
> > @@ -1954,7 +2013,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> > emit_nops(&prog, 2);
> >
> > /* arg1: lea rdi, [rbp - stack_size] */
> > - EMIT4(0x48, 0x8D, 0x7D, -stack_size);
> > + if (!is_imm8(-stack_size))
> > + EMIT3_off32(0x48, 0x8D, 0xBD, -stack_size);
> > + else
> > + EMIT4(0x48, 0x8D, 0x7D, -stack_size);
> > /* arg2: progs[i]->insnsi for interpreter */
> > if (!p->jited)
> > emit_mov_imm64(&prog, BPF_REG_2,
> > @@ -1984,7 +2046,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> > /* arg2: mov rsi, rbx <- start time in nsec */
> > emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
> > /* arg3: lea rdx, [rbp - run_ctx_off] */
> > - EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
> > + if (!is_imm8(-run_ctx_off))
> > + EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off);
> > + else
> > + EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
> > if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
> > return -EINVAL;
> >
> > @@ -2136,7 +2201,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
> > void *func_addr)
> > {
> > int i, ret, nr_regs = m->nr_args, stack_size = 0;
> > - int regs_off, nregs_off, ip_off, run_ctx_off;
> > + int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
> > struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> > struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> > struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
> > @@ -2150,8 +2215,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
> > if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
> > nr_regs += (m->arg_size[i] + 7) / 8 - 1;
> >
> > - /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
> > - if (nr_regs > 6)
> > + /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6
> > + * are passed through regs, the remains are through stack.
> > + */
> > + if (nr_regs > MAX_BPF_FUNC_ARGS)
> > return -ENOTSUPP;
> >
> > /* Generated trampoline stack layout:
> > @@ -2170,7 +2237,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
> > *
> > * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
> > *
> > + * RBP - rbx_off [ rbx value ] always
> > + *
> > * RBP - run_ctx_off [ bpf_tramp_run_ctx ]
> > + *
> > + * [ stack_argN ] BPF_TRAMP_F_CALL_ORIG
> > + * [ ... ]
> > + * [ stack_arg2 ]
> > + * RBP - arg_stack_off [ stack_arg1 ]
> > */
> >
> > /* room for return value of orig_call or fentry prog */
> > @@ -2190,9 +2264,25 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
> >
> > ip_off = stack_size;
> >
> > + stack_size += 8;
> > + rbx_off = stack_size;
> > +
> > stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
> > run_ctx_off = stack_size;
> >
> > + if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) {
> > + stack_size += (nr_regs - 6) * 8;
> > + /* make sure the stack pointer is 16-byte aligned if we
> > + * need pass arguments on stack, which means
> > + * [stack_size + 8(rbp) + 8(rip) + 8(origin rip)]
> > + * should be 16-byte aligned. Following code depend on
> > + * that stack_size is already 8-byte aligned.
> > + */
> > + stack_size += (stack_size % 16) ? 0 : 8;
>
> I think this is correct.
>
> > + }
> > +
> > + arg_stack_off = stack_size;
> > +
> [...]
On Sat, Jun 10, 2023 at 11:29 AM Yonghong Song <[email protected]> wrote:
>
>
>
> On 6/9/23 2:56 AM, [email protected] wrote:
> > From: Menglong Dong <[email protected]>
> >
> > Add test9/test10 in fexit_test.c and fentry_test.c to test the fentry
> > and fexit whose target function have 7/12 arguments.
> >
> > Correspondingly, add bpf_testmod_fentry_test7() and
> > bpf_testmod_fentry_test12() to bpf_testmod.c
> >
> > And the testcases passed:
> >
> > ./test_progs -t fexit
> > Summary: 5/12 PASSED, 0 SKIPPED, 0 FAILED
> >
> > ./test_progs -t fentry
> > Summary: 3/0 PASSED, 0 SKIPPED, 0 FAILED
> >
> > Signed-off-by: Menglong Dong <[email protected]>
> > ---
> > v4:
> > - use different type for args in bpf_testmod_fentry_test{7,12}
> > - add testcase for grabage values in ctx
> > v3:
> > - move bpf_fentry_test{7,12} to bpf_testmod.c and rename them to
> > bpf_testmod_fentry_test{7,12} meanwhile
> > - get return value by bpf_get_func_ret() in
> > "fexit/bpf_testmod_fentry_test12", as we don't change ___bpf_ctx_cast()
> > in this version
> > ---
> > .../selftests/bpf/bpf_testmod/bpf_testmod.c | 19 ++++++-
> > .../selftests/bpf/prog_tests/fentry_fexit.c | 4 +-
> > .../selftests/bpf/prog_tests/fentry_test.c | 2 +
> > .../selftests/bpf/prog_tests/fexit_test.c | 2 +
> > .../testing/selftests/bpf/progs/fentry_test.c | 33 +++++++++++
> > .../testing/selftests/bpf/progs/fexit_test.c | 57 +++++++++++++++++++
> > 6 files changed, 115 insertions(+), 2 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> > index cf216041876c..66615fdbe3df 100644
> > --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> > +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> > @@ -191,6 +191,19 @@ noinline int bpf_testmod_fentry_test3(char a, int b, u64 c)
> > return a + b + c;
> > }
> >
> > +noinline int bpf_testmod_fentry_test7(u64 a, void *b, short c, int d,
> > + void *e, u64 f, u64 g)
> > +{
> > + return a + (long)b + c + d + (long)e + f + g;
> > +}
> > +
> > +noinline int bpf_testmod_fentry_test12(u64 a, void *b, short c, int d,
> > + void *e, u64 f, u64 g, u64 h,
> > + u64 i, u64 j, u64 k, u64 l)
> > +{
> > + return a + (long)b + c + d + (long)e + f + g + h + i + j + k + l;
> > +}
>
> It would be great to add a couple cases with struct arguments
> where each struct has 8 < struct_size <= 16.
Good idea. And I'll add extra test cases for the case
you mentioned before.
> > __diag_pop();
> >
> > int bpf_testmod_fentry_ok;
> > @@ -245,7 +258,11 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
> >
> > if (bpf_testmod_fentry_test1(1) != 2 ||
> > bpf_testmod_fentry_test2(2, 3) != 5 ||
> > - bpf_testmod_fentry_test3(4, 5, 6) != 15)
> > + bpf_testmod_fentry_test3(4, 5, 6) != 15 ||
> > + bpf_testmod_fentry_test7(16, (void *)17, 18, 19, (void *)20,
> > + 21, 22) != 133 ||
> > + bpf_testmod_fentry_test12(16, (void *)17, 18, 19, (void *)20,
> > + 21, 22, 23, 24, 25, 26, 27) != 258)
> > goto out;
> >
> > bpf_testmod_fentry_ok = 1;
> [...]
> > diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
> > index 8f1ccb7302e1..a6d8e03ff5b7 100644
> > --- a/tools/testing/selftests/bpf/progs/fexit_test.c
> > +++ b/tools/testing/selftests/bpf/progs/fexit_test.c
> > @@ -78,3 +78,60 @@ int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
> > test8_result = 1;
> > return 0;
> > }
> > +
> > +__u64 test9_result = 0;
> > +SEC("fexit/bpf_testmod_fentry_test7")
> > +int BPF_PROG(test9, __u64 a, void *b, short c, int d, void *e, char f,
> > + int g, int ret)
> > +{
> > + test9_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
> > + e == (void *)20 && f == 21 && g == 22 && ret == 133;
> > + return 0;
> > +}
> > +
> > +__u64 test10_result = 0;
> > +SEC("fexit/bpf_testmod_fentry_test12")
> > +int BPF_PROG(test10, __u64 a, void *b, short c, int d, void *e, char f,
> > + int g, unsigned int h, long i, __u64 j, unsigned long k,
> > + unsigned char l)
> > +{
> > + __u64 ret;
> > + int err;
> > +
> > + /* BPF_PROG() don't support 14 arguments, and ctx[12] can't be
> > + * accessed yet. So we get the return value by bpf_get_func_ret()
> > + * for now.
> > + */
> > + err = bpf_get_func_ret(ctx, &ret);
>
> Maybe just have 11 arguments for this test case?
>
> > + if (err)
> > + return 0;
> > +
> > + test10_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
> > + e == (void *)20 && f == 21 && g == 22 && h == 23 &&
> > + i == 24 && j == 25 && k == 26 && l == 27 &&
> > + (int)ret == 258;
> > + return 0;
> > +}
> > +
> > +__u64 test11_result = 0;
> > +SEC("fexit/bpf_testmod_fentry_test12")
> > +int BPF_PROG(test11, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f,
> > + __u64 g, __u64 h, __u64 i, __u64 j, __u64 k, __u64 l)
> > +{
> > + __u64 ret;
> > + int err;
> > +
> > + /* BPF_PROG() don't support 14 arguments, and ctx[12] can't be
> > + * accessed yet. So we get the return value by bpf_get_func_ret()
> > + * for now.
> > + */
> > + err = bpf_get_func_ret(ctx, &ret);
> > + if (err)
> > + return 0;
> > +
> > + test11_result = a == 16 && b == 17 && c == 18 && d == 19 &&
> > + e == 20 && f == 21 && g == 22 && h == 23 &&
> > + i == 24 && j == 25 && k == 26 && l == 27 &&
> > + ret == 258;
> > + return 0;
> > +}