LinuxLists.cc - [PATCH v2 2/2] x86/cfi,bpf: Fix BPF JIT call

2023-11-30 13:44:16

Subject: [PATCH v2 2/2] x86/cfi,bpf: Fix BPF JIT call

The current BPF call convention is __nocfi, except when it calls !JIT things,
then it calls regular C functions.

It so happens that with FineIBT the __nocfi and C calling conventions are
incompatible. Specifically __nocfi will call at func+0, while FineIBT will have
endbr-poison there, which is not a valid indirect target. Causing #CP.

Notably this only triggers on IBT enabled hardware, which is probably why this
hasn't been reported (also, most people will have JIT on anyway).

Implement proper CFI prologues for the BPF JIT codegen and drop __nocfi for
x86.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/include/asm/cfi.h | 94 ++++++++++++++++++++++++++++++++++++
arch/x86/kernel/alternative.c | 47 +++++++++++++++---
arch/x86/net/bpf_jit_comp.c | 108 +++++++++++++++++++++++++++++++++++++-----
include/linux/bpf.h | 12 +++-
kernel/bpf/core.c | 20 +++++++
5 files changed, 260 insertions(+), 21 deletions(-)

--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -9,15 +9,109 @@
*/
#include <linux/bug.h>

+/*
+ * An overview of the various calling conventions...
+ *
+ * Traditional:
+ *
+ * foo:
+ * ... code here ...
+ * ret
+ *
+ * direct caller:
+ * call foo
+ *
+ * indirect caller:
+ * lea foo(%rip), %r11
+ * ...
+ * call *%r11
+ *
+ *
+ * IBT:
+ *
+ * foo:
+ * endbr64
+ * ... code here ...
+ * ret
+ *
+ * direct caller:
+ * call foo / call foo+4
+ *
+ * indirect caller:
+ * lea foo(%rip), %r11
+ * ...
+ * call *%r11
+ *
+ *
+ * kCFI:
+ *
+ * __cfi_foo:
+ * movl $0x12345678, %eax
+ * # 11 nops when CONFIG_CALL_PADDING
+ * foo:
+ * endbr64 # when IBT
+ * ... code here ...
+ * ret
+ *
+ * direct call:
+ * call foo # / call foo+4 when IBT
+ *
+ * indirect call:
+ * lea foo(%rip), %r11
+ * ...
+ * movl $(-0x12345678), %r10d
+ * addl -4(%r11), %r10d # -15 when CONFIG_CALL_PADDING
+ * jz 1f
+ * ud2
+ * 1:call *%r11
+ *
+ *
+ * FineIBT (builds as kCFI + CALL_PADDING + IBT + RETPOLINE and runtime patches into):
+ *
+ * __cfi_foo:
+ * endbr64
+ * subl 0x12345678, %r10d
+ * jz foo
+ * ud2
+ * nop
+ * foo:
+ * osp nop3 # was endbr64
+ * ... code here ...
+ * ret
+ *
+ * direct caller:
+ * call foo / call foo+4
+ *
+ * indirect caller:
+ * lea foo(%rip), %r11
+ * ...
+ * movl $0x12345678, %r10d
+ * subl $16, %r11
+ * nop4
+ * call *%r11
+ *
+ */
+enum cfi_mode {
+ CFI_DEFAULT, /* FineIBT if hardware has IBT, otherwise kCFI */
+ CFI_OFF, /* Taditional / IBT depending on .config */
+ CFI_KCFI, /* Optionally CALL_PADDING, IBT, RETPOLINE */
+ CFI_FINEIBT, /* see arch/x86/kernel/alternative.c */
+};
+
+extern enum cfi_mode cfi_mode;
+
struct pt_regs;

#ifdef CONFIG_CFI_CLANG
enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
+#define __bpfcall
+extern u32 cfi_bpf_hash;
#else
static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
{
return BUG_TRAP_TYPE_NONE;
}
+#define cfi_bpf_hash 0U
#endif /* CONFIG_CFI_CLANG */

#endif /* _ASM_X86_CFI_H */
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -30,6 +30,7 @@
#include <asm/fixmap.h>
#include <asm/paravirt.h>
#include <asm/asm-prototypes.h>
+#include <asm/cfi.h>

int __read_mostly alternatives_patched;

@@ -832,15 +833,43 @@ void __init_or_module apply_seal_endbr(s
#endif /* CONFIG_X86_KERNEL_IBT */

#ifdef CONFIG_FINEIBT
+#define __CFI_DEFAULT CFI_DEFAULT
+#elif defined(CONFIG_CFI_CLANG)
+#define __CFI_DEFAULT CFI_KCFI
+#else
+#define __CFI_DEFAULT CFI_OFF
+#endif

-enum cfi_mode {
- CFI_DEFAULT,
- CFI_OFF,
- CFI_KCFI,
- CFI_FINEIBT,
-};
+enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT;
+
+#ifdef CONFIG_CFI_CLANG
+struct bpf_insn;
+
+/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */
+extern unsigned int __bpf_prog_runX(const void *ctx,
+ const struct bpf_insn *insn);
+
+/*
+ * Force a reference to the external symbol so the compiler generates
+ * __kcfi_typid.
+ */
+__ADDRESSABLE(__bpf_prog_runX);
+
+/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */
+asm (
+" .pushsection .data..ro_after_init,\"aw\",@progbits \n"
+" .type cfi_bpf_hash,@object \n"
+" .globl cfi_bpf_hash \n"
+" .p2align 2, 0x0 \n"
+"cfi_bpf_hash: \n"
+" .long __kcfi_typeid___bpf_prog_runX \n"
+" .size cfi_bpf_hash, 4 \n"
+" .popsection \n"
+);
+#endif
+
+#ifdef CONFIG_FINEIBT

-static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT;
static bool cfi_rand __ro_after_init = true;
static u32 cfi_seed __ro_after_init;

@@ -1149,8 +1178,10 @@ static void __apply_fineibt(s32 *start_r
goto err;

if (cfi_rand) {
- if (builtin)
+ if (builtin) {
cfi_seed = get_random_u32();
+ cfi_bpf_hash = cfi_rehash(cfi_bpf_hash);
+ }

ret = cfi_rand_preamble(start_cfi, end_cfi);
if (ret)
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -17,6 +17,7 @@
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
#include <asm/unwind.h>
+#include <asm/cfi.h>

static bool all_callee_regs_used[4] = {true, true, true, true};

@@ -51,9 +52,11 @@ static u8 *emit_code(u8 *ptr, u32 bytes,
do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)

#ifdef CONFIG_X86_KERNEL_IBT
-#define EMIT_ENDBR() EMIT(gen_endbr(), 4)
+#define EMIT_ENDBR() EMIT(gen_endbr(), 4)
+#define EMIT_ENDBR_POISON() EMIT(gen_endbr_poison(), 4)
#else
#define EMIT_ENDBR()
+#define EMIT_ENDBR_POISON()
#endif

static bool is_imm8(int value)
@@ -247,6 +250,7 @@ struct jit_context {
*/
int tail_call_direct_label;
int tail_call_indirect_label;
+ int prog_offset;
};

/* Maximum number of bytes emitted while JITing one eBPF insn */
@@ -305,20 +309,90 @@ static void pop_callee_regs(u8 **pprog,
}

/*
+ * Emit the various CFI preambles, see asm/cfi.h and the comments about FineIBT
+ * in arch/x86/kernel/alternative.c
+ */
+
+static int emit_fineibt(u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ EMIT_ENDBR();
+ EMIT3_off32(0x41, 0x81, 0xea, cfi_bpf_hash); /* subl $hash, %r10d */
+ EMIT2(0x74, 0x07); /* jz.d8 +7 */
+ EMIT2(0x0f, 0x0b); /* ud2 */
+ EMIT1(0x90); /* nop */
+ EMIT_ENDBR_POISON();
+
+ *pprog = prog;
+ return 16;
+}
+
+static int emit_kcfi(u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int offset = 5;
+
+ EMIT1_off32(0xb8, cfi_bpf_hash); /* movl $hash, %eax */
+#ifdef CONFIG_CALL_PADDING
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ offset += 11;
+#endif
+ EMIT_ENDBR();
+
+ *pprog = prog;
+ return offset;
+}
+
+static int emit_cfi(u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int offset = 0;
+
+ switch (cfi_mode) {
+ case CFI_FINEIBT:
+ offset = emit_fineibt(&prog);
+ break;
+
+ case CFI_KCFI:
+ offset = emit_kcfi(&prog);
+ break;
+
+ default:
+ EMIT_ENDBR();
+ break;
+ }
+
+ *pprog = prog;
+ return offset;
+}
+
+/*
* Emit x86-64 prologue code for BPF program.
* bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
* while jumping to another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
- bool tail_call_reachable, bool is_subprog,
- bool is_exception_cb)
+static int emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
+ bool tail_call_reachable, bool is_subprog,
+ bool is_exception_cb)
{
u8 *prog = *pprog;
+ int offset;

+ offset = emit_cfi(&prog);
/* BPF trampoline can be made to work without these nops,
* but let's waste 5 bytes for now and optimize later
*/
- EMIT_ENDBR();
memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
if (!ebpf_from_cbpf) {
@@ -357,6 +431,8 @@ static void emit_prologue(u8 **pprog, u3
if (tail_call_reachable)
EMIT1(0x50); /* push rax */
*pprog = prog;
+
+ return offset;
}

static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
@@ -1083,8 +1159,8 @@ static int do_jit(struct bpf_prog *bpf_p
bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
- int i, excnt = 0;
int ilen, proglen = 0;
+ int i, excnt = 0;
u8 *prog = temp;
int err;

@@ -1094,9 +1170,12 @@ static int do_jit(struct bpf_prog *bpf_p
/* tail call's presence in current prog implies it is reachable */
tail_call_reachable |= tail_call_seen;

- emit_prologue(&prog, bpf_prog->aux->stack_depth,
- bpf_prog_was_classic(bpf_prog), tail_call_reachable,
- bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
+ ctx->prog_offset = emit_prologue(&prog, bpf_prog->aux->stack_depth,
+ bpf_prog_was_classic(bpf_prog),
+ tail_call_reachable,
+ bpf_is_subprog(bpf_prog),
+ bpf_prog->aux->exception_cb);
+
/* Exception callback will clobber callee regs for its own use, and
* restore the original callee regs from main prog's stack frame.
*/
@@ -2935,9 +3014,16 @@ struct bpf_prog *bpf_int_jit_compile(str
jit_data->header = header;
jit_data->rw_header = rw_header;
}
- prog->bpf_func = (void *)image;
+ /*
+ * ctx.prog_offset is used when CFI preambles put code *before*
+ * the function. See emit_cfi(). For FineIBT specifically this code
+ * can also be executed and bpf_prog_kallsyms_add() will
+ * generate an additional symbol to cover this, hence also
+ * decrement proglen.
+ */
+ prog->bpf_func = (void *)image + ctx.prog_offset;
prog->jited = 1;
- prog->jited_len = proglen;
+ prog->jited_len = proglen - ctx.prog_offset;
} else {
prog = orig_prog;
}
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -29,6 +29,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/static_call.h>
#include <linux/memcontrol.h>
+#include <linux/cfi.h>

struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -1188,7 +1189,11 @@ struct bpf_dispatcher {
#endif
};

-static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
+#ifndef __bpfcall
+#define __bpfcall __nocfi
+#endif
+
+static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func(
const void *ctx,
const struct bpf_insn *insnsi,
bpf_func_t bpf_func)
@@ -1278,7 +1283,7 @@ int arch_prepare_bpf_dispatcher(void *im

#define DEFINE_BPF_DISPATCHER(name) \
__BPF_DISPATCHER_SC(name); \
- noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \
+ noinline __bpfcall unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
bpf_func_t bpf_func) \
@@ -1426,6 +1431,9 @@ struct bpf_prog_aux {
struct bpf_kfunc_desc_tab *kfunc_tab;
struct bpf_kfunc_btf_tab *kfunc_btf_tab;
u32 size_poke_tab;
+#ifdef CONFIG_FINEIBT
+ struct bpf_ksym ksym_prefix;
+#endif
struct bpf_ksym ksym;
const struct bpf_prog_ops *ops;
struct bpf_map **used_maps;
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -683,6 +683,23 @@ void bpf_prog_kallsyms_add(struct bpf_pr
fp->aux->ksym.prog = true;

bpf_ksym_add(&fp->aux->ksym);
+
+#ifdef CONFIG_FINEIBT
+ /*
+ * When FineIBT, code in the __cfi_foo() symbols can get executed
+ * and hence unwinder needs help.
+ */
+ if (cfi_mode != CFI_FINEIBT)
+ return;
+
+ snprintf(fp->aux->ksym_prefix.name, KSYM_NAME_LEN,
+ "__cfi_%s", fp->aux->ksym.name);
+
+ fp->aux->ksym_prefix.start = (unsigned long) fp->bpf_func - 16;
+ fp->aux->ksym_prefix.end = (unsigned long) fp->bpf_func;
+
+ bpf_ksym_add(&fp->aux->ksym_prefix);
+#endif
}

void bpf_prog_kallsyms_del(struct bpf_prog *fp)
@@ -691,6 +708,9 @@ void bpf_prog_kallsyms_del(struct bpf_pr
return;

bpf_ksym_del(&fp->aux->ksym);
+#ifdef CONFIG_FINEIBT
+ bpf_ksym_del(&fp->aux->ksym_prefix);
+#endif
}

static struct bpf_ksym *bpf_ksym_find(unsigned long addr)

2023-12-03 23:03:24

by Alexei Starovoitov

[permalink] [raw]

Subject: Re: [PATCH v2 2/2] x86/cfi,bpf: Fix BPF JIT call

On Thu, Nov 30, 2023 at 5:43 AM Peter Zijlstra <[email protected]> wrote:
>
>
> void bpf_prog_kallsyms_del(struct bpf_prog *fp)
> @@ -691,6 +708,9 @@ void bpf_prog_kallsyms_del(struct bpf_pr
> return;
>
> bpf_ksym_del(&fp->aux->ksym);
> +#ifdef CONFIG_FINEIBT
> + bpf_ksym_del(&fp->aux->ksym_prefix);
> +#endif
> }

Thank you for addressing all comments, but it panics during boot with:

[ 3.109474] RIP: 0010:bpf_prog_kallsyms_del+0x10f/0x140
[ 3.109867] Code: 26 e0 00 ff 05 32 dd dd 01 48 8d bb 80 03 00 00
48 c7 c6 b8 b3 00 83 e8 ef 25 e0 00 48 8b 83 58 03 00 00 48 8b 8b 60
03 00 00 <48> 89 48 08 48 89 01 4c 89 b3 60 03 00 00 48 c7 c7 10 0b 7b
83 5b
[ 3.111282] RSP: 0000:ffffc90000013e08 EFLAGS: 00010246
[ 3.116968] Call Trace:
[ 3.117163] <TASK>
[ 3.117328] ? __die_body+0x68/0xb0
[ 3.117599] ? page_fault_oops+0x317/0x390
[ 3.117909] ? debug_objects_fill_pool+0x19/0x440
[ 3.118283] ? debug_objects_fill_pool+0x19/0x440
[ 3.118715] ? do_user_addr_fault+0x4cd/0x560
[ 3.119045] ? exc_page_fault+0x62/0x1c0
[ 3.119350] ? asm_exc_page_fault+0x26/0x30
[ 3.119675] ? bpf_prog_kallsyms_del+0x10f/0x140
[ 3.120023] ? bpf_prog_kallsyms_del+0x101/0x140
[ 3.120381] __bpf_prog_put_noref+0x12/0xf0
[ 3.120704] bpf_prog_put_deferred+0xe9/0x110
[ 3.121035] bpf_prog_put+0xbb/0xd0
[ 3.121307] bpf_prog_release+0x15/0x20

Adding the following:

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5c84a935ba63..5013fd53adfd 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -709,6 +709,8 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp)

bpf_ksym_del(&fp->aux->ksym);
#ifdef CONFIG_FINEIBT
+ if (cfi_mode != CFI_FINEIBT)
+ return;
bpf_ksym_del(&fp->aux->ksym_prefix);
#endif
}

fixes the boot issue, but test_progs is not happy.

Just running test_progs it splats right away:

[ 74.047757] kmemleak: Found object by alias at 0xffffffffa0001d80
[ 74.048272] CPU: 14 PID: 104 Comm: kworker/14:0 Tainted: G W
O 6.7.0-rc3-00702-g41c30fec304d-dirty #5241
[ 74.049118] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[ 74.050042] Workqueue: events bpf_prog_free_deferred
[ 74.050448] Call Trace:
[ 74.050663] <TASK>
[ 74.050841] dump_stack_lvl+0x55/0x80
[ 74.051141] __find_and_remove_object+0xdb/0x110
[ 74.051521] kmemleak_free+0x41/0x70
[ 74.051828] vfree+0x36/0x130
[ 74.052076] ? process_scheduled_works+0x1d7/0x520
[ 74.052463] bpf_prog_pack_free+0x42/0x1a0
[ 74.052803] ? process_scheduled_works+0x1d7/0x520
[ 74.053194] bpf_jit_binary_pack_free+0x17/0x30
[ 74.053565] bpf_jit_free+0x57/0x90
[ 74.053856] process_scheduled_works+0x250/0x520
[ 74.054234] worker_thread+0x26f/0x400
[ 74.054542] ? __cfi_worker_thread+0x10/0x10
[ 74.054909] kthread+0x113/0x130
[ 74.055178] ? __cfi_kthread+0x10/0x10
[ 74.055487] ret_from_fork+0x48/0x60
[ 74.055793] ? __cfi_kthread+0x10/0x10
[ 74.056102] ret_from_fork_asm+0x11/0x30
[ 74.056427] </TASK>
[ 74.056616] kmemleak: Object 0xffffffffa0000000 (size 2097152):
[ 74.057089] kmemleak: comm "swapper/0", pid 1, jiffies 4294667572
[ 74.057594] kmemleak: min_count = 2
[ 74.057892] kmemleak: count = 2
[ 74.058164] kmemleak: flags = 0x1
[ 74.058448] kmemleak: checksum = 0
[ 74.058746] kmemleak: backtrace:
[ 74.059025] kmemleak_vmalloc+0x2d/0xd0
[ 74.059338] __vmalloc_node_range+0x7e0/0x810
[ 74.059726] module_alloc+0x5f/0x70
[ 74.060015] bpf_prog_pack_alloc+0x167/0x260
[ 74.060374] bpf_jit_binary_pack_alloc+0xca/0x1e0
[ 74.060760] bpf_int_jit_compile+0x3c5d/0x4140
[ 74.061120] bpf_prog_select_runtime+0x239/0x320
[ 74.061496] bpf_prepare_filter+0x49d/0x4c0
[ 74.061844] bpf_prog_create+0x80/0xc0
[ 74.062149] ptp_classifier_init+0x29/0x40
[ 74.062480] sock_init+0x9c/0xb0
[ 74.062753] do_one_initcall+0xdd/0x2f0
[ 74.063067] do_initcall_level+0x98/0x105
[ 74.063394] do_initcalls+0x43/0x80
[ 74.063687] kernel_init_freeable+0x15f/0x1d0
[ 74.064039] kernel_init+0x1a/0x1b0

[ 74.064993] Trying to vfree() bad address (000000001f212011)
[ 74.065625] WARNING: CPU: 14 PID: 104 at mm/vmalloc.c:2692
remove_vm_area+0x141/0x150

[ 74.089515] Trying to vfree() nonexistent vm area (000000001f212011)
[ 74.090234] WARNING: CPU: 14 PID: 104 at mm/vmalloc.c:2827 vfree+0xfe/0x130

[ 74.129930] Trying to vfree() bad address (000000009ed2080e)
[ 74.130408] WARNING: CPU: 14 PID: 149 at mm/vmalloc.c:2692
remove_vm_area+0x141/0x150

and eventually panics with:

[ 74.195676] BUG: unable to handle page fault for address: ffffffffa00020c0
[ 74.196541] #PF: supervisor read access in kernel mode
[ 74.197548] #PF: error_code(0x0000) - not-present page
[ 74.201441] PGD 3058067 P4D 3058067 PUD 3059063 PMD 101d69067 PTE 0
[ 74.202162] Oops: 0000 [#1] PREEMPT SMP PTI
[ 74.202602] CPU: 14 PID: 2151 Comm: kworker/14:5 Tainted: G
W O 6.7.0-rc3-00702-g41c30fec304d-dirty #5241
[ 74.203567] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[ 74.204551] Workqueue: events bpf_prog_free_deferred
[ 74.205039] RIP: 0010:bpf_prog_pack_free+0x20/0x1a0
[ 74.205469] Code: 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 0f 1f
44 00 00 55 41 57 41 56 53 49 89 fe 48 c7 c7 60 0b 7b 83 31 f6 e8 30
a7 e2 00 <41> 8b 0e 48 8b 3d 36 71 6d 02 f3 48 0f b8 c7 48 98 48 c1 e0
15 48
[ 74.207102] RSP: 0018:ffffc900006e7de0 EFLAGS: 00010282
[ 74.214890] Call Trace:
[ 74.215108] <TASK>
[ 74.215305] ? __die_body+0x68/0xb0
[ 74.215620] ? page_fault_oops+0x317/0x390
[ 74.215977] ? do_kern_addr_fault+0x8a/0xb0
[ 74.216351] ? exc_page_fault+0xa0/0x1c0
[ 74.216697] ? asm_exc_page_fault+0x26/0x30
[ 74.217055] ? process_scheduled_works+0x1d7/0x520
[ 74.217481] ? bpf_prog_pack_free+0x20/0x1a0
[ 74.217857] ? process_scheduled_works+0x1d7/0x520
[ 74.218279] bpf_jit_binary_pack_free+0x17/0x30
[ 74.218676] bpf_jit_free+0x57/0x90
[ 74.218983] process_scheduled_works+0x250/0x520
[ 74.219388] worker_thread+0x26f/0x400

The kernel was compiled with:
CONFIG_CC_HAS_SLS=y
CONFIG_CC_HAS_RETURN_THUNK=y
CONFIG_CC_HAS_ENTRY_PADDING=y
CONFIG_FUNCTION_PADDING_CFI=11
CONFIG_FUNCTION_PADDING_BYTES=11
CONFIG_CALL_PADDING=y
CONFIG_FINEIBT=y
CONFIG_HAVE_CALL_THUNKS=y
CONFIG_SPECULATION_MITIGATIONS=y
CONFIG_PAGE_TABLE_ISOLATION=y
CONFIG_RETPOLINE=y
CONFIG_RETHUNK=y
CONFIG_CPU_UNRET_ENTRY=y
...
CONFIG_DEBUG_KMEMLEAK=y
...
CONFIG_CFI_CLANG=y

and 'make LLVM=1', of course.

I suspect the above vmalloc/prog_pack issue is somehow
related to the patches, but I cannot prove, since without
this CFI fixes it also panics:

[ 29.079722] CFI failure at bpf_for_each_array_elem+0xa6/0x100
(target: bpf_prog_5a19eca5d8e54e9b_check_elem+0x0/0x42; expected type:
0xe37465df)
[ 29.080884] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[ 29.081244] CPU: 8 PID: 2142 Comm: test_progs Tainted: G
O 6.7.0-rc3-00699-g90679706d486 #5242
[ 29.082662] RIP: 0010:bpf_for_each_array_elem+0xa6/0x100
[ 29.083027] Code: af ef 4c 01 ed 44 89 7c 24 04 4c 89 e7 48 8d 74
24 04 48 89 ea 48 89 d9 45 31 c0 4d 89 f3 41 ba 21 9a 8b 1c 45 03 53
f1 74 02 <0f> 0b 2e e8 62 95 de 00 48 85 c0 75 0e 49 8d 47 01 41 8b 4c
24 24
[ 29.084282] RSP: 0018:ffffc9000269fea8 EFLAGS: 00010286
[ 29.089633] Call Trace:
[ 29.089805] <TASK>
[ 29.089953] ? __die_body+0x68/0xb0
[ 29.090192] ? die+0xa4/0xd0
[ 29.090391] ? do_trap+0xa5/0x180
[ 29.090619] ? bpf_for_each_array_elem+0xa6/0x100
[ 29.090941] ? do_error_trap+0xb6/0x100
[ 29.091200] ? bpf_for_each_array_elem+0xa6/0x100
[ 29.091516] ? bpf_for_each_array_elem+0xa6/0x100
[ 29.091848] ? handle_invalid_op+0x2c/0x40
[ 29.092123] ? bpf_for_each_array_elem+0xa6/0x100
[ 29.092439] ? exc_invalid_op+0x38/0x60
[ 29.092699] ? asm_exc_invalid_op+0x1a/0x20
[ 29.092985] ? 0xffffffffa0000b8c
[ 29.093212] ? 0xffffffffa0000b8c
[ 29.093439] ? bpf_for_each_array_elem+0xa6/0x100
[ 29.093759] ? preempt_count_add+0x5d/0xb0
[ 29.094034] bpf_prog_ca45ea7f9cb8ac1a_inner_map+0x94/0x98
[ 29.094415] bpf_trampoline_6442516600+0x47/0x1000
[ 29.094743] __x64_sys_getpgid+0x9/0x20

which is expected.
In this case test_progs proceeds further before it CFI aborts.
With CFI fixes vmalloc panics sooner.

Song,

you're an expert in prog_pack logic, please take a look as well.

Peter,

if you're struggling to setup bpf tests locally feel free
to add an extra patch that adds
CONFIG_FINEIBT=y and others
to tools/testing/selftests/bpf/config.x86_64
and resend.
BPF CI will apply that patch to kconfig while building the kernel
and will run the tests accordingly.
It will be ignored with gcc builds, but clang builds should pick it up.

Or do this:
https://docs.kernel.org/bpf/bpf_devel_QA.html#q-how-do-i-run-bpf-ci-on-my-changes-before-sending-them-out-for-review

It might be easier to test this way.
Same point about extra patch for tools/testing/selftests/bpf/config.x86_64.

2023-12-04 09:14:54