2021-08-10 15:48:51

by liuqi (BA)

[permalink] [raw]
Subject: [PATCH v3 0/2] arm64: Enable OPTPROBE for arm64

This patch introduce optprobe for ARM64, using a branch instruction
to replace probed instruction.

The test result on Hip08 platform is shown here, and optprobe could
reduce the latency to 1/4 of normal kprobe

kprobe before optimized:
[280709.846380] do_empty returned 0 and took 1530 ns to execute
[280709.852057] do_empty returned 0 and took 550 ns to execute
[280709.857631] do_empty returned 0 and took 440 ns to execute
[280709.863215] do_empty returned 0 and took 380 ns to execute
[280709.868787] do_empty returned 0 and took 360 ns to execute
[280709.874362] do_empty returned 0 and took 340 ns to execute
[280709.879936] do_empty returned 0 and took 320 ns to execute
[280709.885505] do_empty returned 0 and took 300 ns to execute
[280709.891075] do_empty returned 0 and took 280 ns to execute
[280709.896646] do_empty returned 0 and took 290 ns to execute
[280709.902220] do_empty returned 0 and took 290 ns to execute
[280709.907807] do_empty returned 0 and took 290 ns to execute

optprobe:
[ 2965.964572] do_empty returned 0 and took 90 ns to execute
[ 2965.969952] do_empty returned 0 and took 80 ns to execute
[ 2965.975332] do_empty returned 0 and took 70 ns to execute
[ 2965.980714] do_empty returned 0 and took 60 ns to execute
[ 2965.986128] do_empty returned 0 and took 80 ns to execute
[ 2965.991507] do_empty returned 0 and took 70 ns to execute
[ 2965.996884] do_empty returned 0 and took 70 ns to execute
[ 2966.002262] do_empty returned 0 and took 80 ns to execute
[ 2966.007642] do_empty returned 0 and took 70 ns to execute
[ 2966.013020] do_empty returned 0 and took 70 ns to execute
[ 2966.018400] do_empty returned 0 and took 70 ns to execute
[ 2966.023779] do_empty returned 0 and took 70 ns to execute
[ 2966.029158] do_empty returned 0 and took 70 ns to execute

Changes since V2:
- Address the comments from Masami, prepare another writable buffer in
arch_prepare_optimized_kprobe()and build the trampoline code on it.
- Address the comments from Amit, move save_all_base_regs and
restore_all_base_regs to <asm/assembler.h>, as these two macros are reused
in optprobe.
- Link: https://lore.kernel.org/lkml/[email protected]/

Changes since V1:
- Address the comments from Masami, checks for all branch instructions, and
use aarch64_insn_patch_text_nosync() instead of aarch64_insn_patch_text()
in each probe.
- Link: https://lore.kernel.org/lkml/[email protected]/

Qi Liu (2):
arm64: assembler: Make save_all_base_regs and restore_all_base_regs common macros
arm64: kprobe: Enable OPTPROBE for arm64

arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/assembler.h | 52 ++++
arch/arm64/include/asm/kprobes.h | 24 ++
arch/arm64/kernel/probes/Makefile | 2 +
arch/arm64/kernel/probes/kprobes.c | 19 +-
arch/arm64/kernel/probes/kprobes_trampoline.S | 52 ----
arch/arm64/kernel/probes/opt_arm64.c | 239 ++++++++++++++++++
.../arm64/kernel/probes/optprobe_trampoline.S | 37 +++
8 files changed, 371 insertions(+), 55 deletions(-)
create mode 100644 arch/arm64/kernel/probes/opt_arm64.c
create mode 100644 arch/arm64/kernel/probes/optprobe_trampoline.S

--
2.17.1


2021-08-10 15:48:51

by liuqi (BA)

[permalink] [raw]
Subject: [PATCH v3 1/2] arm64: assembler: Make save_all_base_regs and restore_all_base_regs common macros

Move save_all_base_regs and restore_all_base_regs to <asm/assembler.h>,
as these two macros can be reused in optprobe.

Signed-off-by: Qi Liu <[email protected]>
---
arch/arm64/include/asm/assembler.h | 52 +++++++++++++++++++
arch/arm64/kernel/probes/kprobes_trampoline.S | 52 -------------------
2 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 89faca0e740d..cd912810fc80 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -515,6 +515,58 @@ alternative_endif
.pushsection "_kprobe_blacklist", "aw"; \
.quad x; \
.popsection;
+
+ .macro save_all_base_regs
+ stp x0, x1, [sp, #S_X0]
+ stp x2, x3, [sp, #S_X2]
+ stp x4, x5, [sp, #S_X4]
+ stp x6, x7, [sp, #S_X6]
+ stp x8, x9, [sp, #S_X8]
+ stp x10, x11, [sp, #S_X10]
+ stp x12, x13, [sp, #S_X12]
+ stp x14, x15, [sp, #S_X14]
+ stp x16, x17, [sp, #S_X16]
+ stp x18, x19, [sp, #S_X18]
+ stp x20, x21, [sp, #S_X20]
+ stp x22, x23, [sp, #S_X22]
+ stp x24, x25, [sp, #S_X24]
+ stp x26, x27, [sp, #S_X26]
+ stp x28, x29, [sp, #S_X28]
+ add x0, sp, #PT_REGS_SIZE
+ stp lr, x0, [sp, #S_LR]
+ /*
+ * Construct a useful saved PSTATE
+ */
+ mrs x0, nzcv
+ mrs x1, daif
+ orr x0, x0, x1
+ mrs x1, CurrentEL
+ orr x0, x0, x1
+ mrs x1, SPSel
+ orr x0, x0, x1
+ stp xzr, x0, [sp, #S_PC]
+ .endm
+
+ .macro restore_all_base_regs
+ ldr x0, [sp, #S_PSTATE]
+ and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT)
+ msr nzcv, x0
+ ldp x0, x1, [sp, #S_X0]
+ ldp x2, x3, [sp, #S_X2]
+ ldp x4, x5, [sp, #S_X4]
+ ldp x6, x7, [sp, #S_X6]
+ ldp x8, x9, [sp, #S_X8]
+ ldp x10, x11, [sp, #S_X10]
+ ldp x12, x13, [sp, #S_X12]
+ ldp x14, x15, [sp, #S_X14]
+ ldp x16, x17, [sp, #S_X16]
+ ldp x18, x19, [sp, #S_X18]
+ ldp x20, x21, [sp, #S_X20]
+ ldp x22, x23, [sp, #S_X22]
+ ldp x24, x25, [sp, #S_X24]
+ ldp x26, x27, [sp, #S_X26]
+ ldp x28, x29, [sp, #S_X28]
+ .endm
#else
#define NOKPROBE(x)
#endif
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index 288a84e253cc..2463d5d0e004 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -9,58 +9,6 @@

.text

- .macro save_all_base_regs
- stp x0, x1, [sp, #S_X0]
- stp x2, x3, [sp, #S_X2]
- stp x4, x5, [sp, #S_X4]
- stp x6, x7, [sp, #S_X6]
- stp x8, x9, [sp, #S_X8]
- stp x10, x11, [sp, #S_X10]
- stp x12, x13, [sp, #S_X12]
- stp x14, x15, [sp, #S_X14]
- stp x16, x17, [sp, #S_X16]
- stp x18, x19, [sp, #S_X18]
- stp x20, x21, [sp, #S_X20]
- stp x22, x23, [sp, #S_X22]
- stp x24, x25, [sp, #S_X24]
- stp x26, x27, [sp, #S_X26]
- stp x28, x29, [sp, #S_X28]
- add x0, sp, #PT_REGS_SIZE
- stp lr, x0, [sp, #S_LR]
- /*
- * Construct a useful saved PSTATE
- */
- mrs x0, nzcv
- mrs x1, daif
- orr x0, x0, x1
- mrs x1, CurrentEL
- orr x0, x0, x1
- mrs x1, SPSel
- orr x0, x0, x1
- stp xzr, x0, [sp, #S_PC]
- .endm
-
- .macro restore_all_base_regs
- ldr x0, [sp, #S_PSTATE]
- and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT)
- msr nzcv, x0
- ldp x0, x1, [sp, #S_X0]
- ldp x2, x3, [sp, #S_X2]
- ldp x4, x5, [sp, #S_X4]
- ldp x6, x7, [sp, #S_X6]
- ldp x8, x9, [sp, #S_X8]
- ldp x10, x11, [sp, #S_X10]
- ldp x12, x13, [sp, #S_X12]
- ldp x14, x15, [sp, #S_X14]
- ldp x16, x17, [sp, #S_X16]
- ldp x18, x19, [sp, #S_X18]
- ldp x20, x21, [sp, #S_X20]
- ldp x22, x23, [sp, #S_X22]
- ldp x24, x25, [sp, #S_X24]
- ldp x26, x27, [sp, #S_X26]
- ldp x28, x29, [sp, #S_X28]
- .endm
-
SYM_CODE_START(kretprobe_trampoline)
sub sp, sp, #PT_REGS_SIZE

--
2.17.1