2024-02-29 19:44:10

by Puranjay Mohan

[permalink] [raw]
Subject: Re: arm64: Supporting DYNAMIC_FTRACE_WITH_CALL_OPS with CLANG_CFI

[email protected] writes:

> I would propose the following changes to the compiler that could fix this
> issue:
>
> 1. The kCFI hash should always be generated at func_addr - 4, this would
> make the calling code consistent.
>
> 2. The two(n) nops should be generated before the kCFI hash. We would
> modify the ftrace code to look for these nops at (fun_addr - 12) and
> (func_addr - 8) when CFI is enabled, and (func_addr - 8), (func_addr -
> 4) when CFI is disabled.
>
> The generated code could then look like:
>
> ffff80008033e9b0: d503201f nop
> ffff80008033e9b4: d503201f nop
> ffff80008033e9b8: 16c516ce kCFI hash
> ffff80008033e9bc <jump_label_cmp>:
> ffff80008033e9bc: d503245f bti c
> ffff80008033e9c0: d503201f nop
> ffff80008033e9c4: d503201f nop
> [.....]
>

I hacked some patches and tried the above approach:

Both CFI and DIRECT CALLS are enabled:

[root@localhost ~]# zcat /proc/config.gz | grep DIRECT_CALLS
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
[root@localhost ~]# zcat /proc/config.gz | grep CONFIG_CFI
CONFIG_CFI_CLANG=y
CONFIG_CFI_PERMISSIVE=y
CONFIG_CFI_WITHOUT_STATIC_CALL=y

Running some BPF selftests that use this feature.

/test_progs -a "*fentry*,*fexit*,tracing_struct" -b "fentry_test/fentry_many_args" -b "fexit_test/fexit_many_args"

#77 fentry_attach_btf_presence:OK
#78 fentry_fexit:OK
#79/1 fentry_test/fentry:OK
#79 fentry_test:OK
#80/1 fexit_bpf2bpf/target_no_callees:OK
#80/2 fexit_bpf2bpf/target_yes_callees:OK
#80/3 fexit_bpf2bpf/func_replace:OK
#80/4 fexit_bpf2bpf/func_replace_verify:OK
#80/5 fexit_bpf2bpf/func_sockmap_update:OK
#80/6 fexit_bpf2bpf/func_replace_return_code:OK
#80/7 fexit_bpf2bpf/func_map_prog_compatibility:OK
#80/8 fexit_bpf2bpf/func_replace_unreliable:OK
#80/9 fexit_bpf2bpf/func_replace_multi:OK
#80/10 fexit_bpf2bpf/fmod_ret_freplace:OK
#80/11 fexit_bpf2bpf/func_replace_global_func:OK
#80/12 fexit_bpf2bpf/fentry_to_cgroup_bpf:OK
#80/13 fexit_bpf2bpf/func_replace_progmap:OK
#80 fexit_bpf2bpf:OK
#81 fexit_sleep:OK
#82 fexit_stress:OK
#83/1 fexit_test/fexit:OK
#83 fexit_test:OK
#158 module_fentry_shadow:OK
#193/1 recursive_fentry/attach:OK
#193/2 recursive_fentry/load:OK
#193/3 recursive_fentry/detach:OK
#193 recursive_fentry:OK
#385 tracing_struct:OK
Summary: 10/18 PASSED, 0 SKIPPED, 0 FAILED

Running basic ftrace selftest:

[root@localhost ftrace]# ./ftracetest test.d/00basic/
=== Ftrace unit tests ===
[1] Basic trace file check [PASS]
[2] Basic test for tracers [PASS]
[3] Basic trace clock test [PASS]
[4] Basic event tracing check [PASS]
[5] Change the ringbuffer size [PASS]
[6] Change the ringbuffer sub-buffer size [PASS]
[7] Snapshot and tracing setting [PASS]
[8] Snapshot and tracing_cpumask [PASS]
[9] Test file and directory owership changes for eventfs [PASS]
[10] Basic tests on writing to trace_marker [PASS]
[11] trace_pipe and trace_marker [PASS]
[12] (instance) Basic test for tracers [PASS]
[13] (instance) Basic trace clock test [PASS]
[14] (instance) Change the ringbuffer size [PASS]
[15] (instance) Change the ringbuffer sub-buffer size [PASS]
[16] (instance) Snapshot and tracing setting [PASS]
[17] (instance) Snapshot and tracing_cpumask [PASS]
[18] (instance) Basic tests on writing to trace_marker [PASS]
[19] (instance) trace_pipe and trace_marker [PASS]


# of passed: 19
# of failed: 0
# of unresolved: 0
# of untested: 0
# of unsupported: 0
# of xfailed: 0
# of undefined(test bug): 0

Here are the patches(RFC) that I created:

LLVM Patch:

--- >8 ---
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e89a1c26c..f1ca33c26 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -982,9 +982,6 @@ void AsmPrinter::emitFunctionHeader() {
}
}

- // Emit KCFI type information before patchable-function-prefix nops.
- emitKCFITypeId(*MF);
-
// Emit M NOPs for -fpatchable-function-entry=N,M where M>0. We arbitrarily
// place prefix data before NOPs.
unsigned PatchableFunctionPrefix = 0;
@@ -1006,6 +1003,9 @@ void AsmPrinter::emitFunctionHeader() {
CurrentPatchableFunctionEntrySym = CurrentFnBegin;
}

+ // Emit KCFI type information after patchable-function-prefix nops.
+ emitKCFITypeId(*MF);
+
// Emit the function prologue data for the indirect call sanitizer.
if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
assert(MD->getNumOperands() == 2);
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 4fa719ad6..678a38a6a 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -474,20 +474,11 @@ void AArch64AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
assert(ScratchRegs[0] != AddrReg && ScratchRegs[1] != AddrReg &&
"Invalid scratch registers for KCFI_CHECK");

- // Adjust the offset for patchable-function-prefix. This assumes that
- // patchable-function-prefix is the same for all functions.
- int64_t PrefixNops = 0;
- (void)MI.getMF()
- ->getFunction()
- .getFnAttribute("patchable-function-prefix")
- .getValueAsString()
- .getAsInteger(10, PrefixNops);
-
// Load the target function type hash.
EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDURWi)
.addReg(ScratchRegs[0])
.addReg(AddrReg)
- .addImm(-(PrefixNops * 4 + 4)));
+ .addImm(-4));
}

// Load the expected type hash.
--- 8< ---

Kernel Patch:

--- >8 ---
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index aa7c1d435..979c290e6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -197,7 +197,7 @@ config ARM64
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS \
if DYNAMIC_FTRACE_WITH_ARGS && DYNAMIC_FTRACE_WITH_CALL_OPS
select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \
- if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG && \
+ if (DYNAMIC_FTRACE_WITH_ARGS && \
!CC_OPTIMIZE_FOR_SIZE)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
if DYNAMIC_FTRACE_WITH_ARGS
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index f0c16640e..9be421583 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -48,8 +48,15 @@ SYM_CODE_START(ftrace_caller)
* alignment first as this allows us to fold the subtraction into the
* LDR.
*/
+
+#ifdef CONFIG_CFI_CLANG
+ sub x11, x30, #20
+ bic x11, x11, 0x7
+ ldr x11, [x11, 0] // op
+#else
bic x11, x30, 0x7
ldr x11, [x11, #-(4 * AARCH64_INSN_SIZE)] // op
+#endif

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
/*
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11..ed7a86b31 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -125,6 +125,10 @@ unsigned long ftrace_call_adjust(unsigned long addr)
/* Skip the NOPs placed before the function entry point */
addr += 2 * AARCH64_INSN_SIZE;

+ /* Skip the hash in case of CLANG_CFI */
+ if (IS_ENABLED(CONFIG_CFI_CLANG))
+ addr += AARCH64_INSN_SIZE;
+
/* Skip any BTI */
if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) {
u32 insn = le32_to_cpu(*(__le32 *)addr);
@@ -299,7 +303,11 @@ static const struct ftrace_ops *arm64_rec_get_ops(struct dyn_ftrace *rec)
static int ftrace_rec_set_ops(const struct dyn_ftrace *rec,
const struct ftrace_ops *ops)
{
+#ifdef CONFIG_CFI_CLANG
+ unsigned long literal = ALIGN_DOWN(rec->ip - 16, 8);
+#elif
unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8);
+#endif
return aarch64_insn_write_literal_u64((void *)literal,
(unsigned long)ops);
}
--- 8< ---

I applied the kernel patch over my tree that has patches to fix kCFI
with BPF and also has a static call related fix [1].


Thanks,
Puranjay

[1] https://github.com/puranjaymohan/linux/tree/kcfi_bpf