2024-02-04 03:46:50

by Jinghao Jia

[permalink] [raw]
Subject: [PATCH v2 0/3] x86/kprobes: add exception opcode detector and boost more opcodes

Hi everyone,

This patch set makes the following 3 changes:

- It refactors the can_probe and can_boost function to make them return
bool instead of int. Both functions are just using int as bool so let's
make them return a real boolean value.

- It adds an exception opcode detector to prevent kprobing on INTs and UDs.
These opcodes serves special purposes in the kernel and kprobing them
will also cause the stack trace to be polluted by the copy buffer
address. This is suggested by Masami.

- At the same time, this patch set also boosts more opcodes from the group
2/3/4/5. The newly boosted opcodes are all arithmetic instructions with
semantics that are easy to reason about, and therefore, they are able to
be boosted and executed out-of-line. These instructions were not boosted
previously because they use opcode extensions that are not handled by the
kernel. But now with the instruction decoder they can be easily handled.
Boosting (and further jump optimizing) these instructions leads to a 10x
performance gain for a single probe on QEMU.

Changelog:
---
v1 -> v2
v1: https://lore.kernel.org/linux-trace-kernel/[email protected]/

- Address feedback from Xin:
- Change return type of is_exception_insn from int to bool.

- Address feedback from Masami:
- Improve code style in is_exception_insn.
- Move instruction boundary check of the target address (addr == paddr)
right after the decoding loop to avoid decoding if the target address
is not a valid instruction boundary.
- Document instruction encoding differences between AMD and Intel for
instruction group 2 and 3 in can_boost.

- Add an extra patch to change the return type of can_probe and can_boost
from int to bool based on v1 discussion.

- Improve code comments in general.

Jinghao Jia (3):
x86/kprobes: Refactor can_{probe,boost} return type to bool
x86/kprobes: Prohibit kprobing on INT and UD
x86/kprobes: Boost more instructions from grp2/3/4/5

arch/x86/kernel/kprobes/common.h | 2 +-
arch/x86/kernel/kprobes/core.c | 98 ++++++++++++++++++++++----------
2 files changed, 69 insertions(+), 31 deletions(-)

--
2.43.0



2024-02-04 03:58:34

by Jinghao Jia

[permalink] [raw]
Subject: [PATCH v2 1/3] x86/kprobes: Refactor can_{probe,boost} return type to bool

Both can_probe and can_boost have int return type but are using int as
boolean in their context.

Refactor both functions to make them actually return boolean.

Signed-off-by: Jinghao Jia <[email protected]>
---
arch/x86/kernel/kprobes/common.h | 2 +-
arch/x86/kernel/kprobes/core.c | 33 +++++++++++++++-----------------
2 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index c993521d4933..e772276f5aa9 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -78,7 +78,7 @@
#endif

/* Ensure if the instruction can be boostable */
-extern int can_boost(struct insn *insn, void *orig_addr);
+extern bool can_boost(struct insn *insn, void *orig_addr);
/* Recover instruction if given address is probed */
extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
unsigned long addr);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index e8babebad7b8..644d416441fb 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -137,14 +137,14 @@ NOKPROBE_SYMBOL(synthesize_relcall);
* Returns non-zero if INSN is boostable.
* RIP relative instructions are adjusted at copying time in 64 bits mode
*/
-int can_boost(struct insn *insn, void *addr)
+bool can_boost(struct insn *insn, void *addr)
{
kprobe_opcode_t opcode;
insn_byte_t prefix;
int i;

if (search_exception_tables((unsigned long)addr))
- return 0; /* Page fault may occur on this address. */
+ return false; /* Page fault may occur on this address. */

/* 2nd-byte opcode */
if (insn->opcode.nbytes == 2)
@@ -152,7 +152,7 @@ int can_boost(struct insn *insn, void *addr)
(unsigned long *)twobyte_is_boostable);

if (insn->opcode.nbytes != 1)
- return 0;
+ return false;

for_each_insn_prefix(insn, i, prefix) {
insn_attr_t attr;
@@ -160,7 +160,7 @@ int can_boost(struct insn *insn, void *addr)
attr = inat_get_opcode_attribute(prefix);
/* Can't boost Address-size override prefix and CS override prefix */
if (prefix == 0x2e || inat_is_address_size_prefix(attr))
- return 0;
+ return false;
}

opcode = insn->opcode.bytes[0];
@@ -181,12 +181,12 @@ int can_boost(struct insn *insn, void *addr)
case 0xf6 ... 0xf7: /* Grp3 */
case 0xfe: /* Grp4 */
/* ... are not boostable */
- return 0;
+ return false;
case 0xff: /* Grp5 */
/* Only indirect jmp is boostable */
return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
default:
- return 1;
+ return true;
}
}

@@ -253,20 +253,18 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add
}

/* Check if paddr is at an instruction boundary */
-static int can_probe(unsigned long paddr)
+static bool can_probe(unsigned long paddr)
{
unsigned long addr, __addr, offset = 0;
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];

if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
- return 0;
+ return false;

/* Decode instructions */
addr = paddr - offset;
while (addr < paddr) {
- int ret;
-
/*
* Check if the instruction has been modified by another
* kprobe, in which case we replace the breakpoint by the
@@ -277,11 +275,10 @@ static int can_probe(unsigned long paddr)
*/
__addr = recover_probed_instruction(buf, addr);
if (!__addr)
- return 0;
+ return false;

- ret = insn_decode_kernel(&insn, (void *)__addr);
- if (ret < 0)
- return 0;
+ if (insn_decode_kernel(&insn, (void *)__addr) < 0)
+ return false;

#ifdef CONFIG_KGDB
/*
@@ -290,7 +287,7 @@ static int can_probe(unsigned long paddr)
*/
if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
kgdb_has_hit_break(addr))
- return 0;
+ return false;
#endif
addr += insn.length;
}
@@ -310,10 +307,10 @@ static int can_probe(unsigned long paddr)
*/
__addr = recover_probed_instruction(buf, addr);
if (!__addr)
- return 0;
+ return false;

if (insn_decode_kernel(&insn, (void *)__addr) < 0)
- return 0;
+ return false;

if (insn.opcode.value == 0xBA)
offset = 12;
@@ -324,7 +321,7 @@ static int can_probe(unsigned long paddr)

/* This movl/addl is used for decoding CFI. */
if (is_cfi_trap(addr + offset))
- return 0;
+ return false;
}

out:
--
2.43.0


2024-02-04 04:45:38

by Jinghao Jia

[permalink] [raw]
Subject: [PATCH v2 3/3] x86/kprobes: Boost more instructions from grp2/3/4/5

With the instruction decoder, we are now able to decode and recognize
instructions with opcode extensions. There are more instructions in
these groups that can be boosted:

Group 2: ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR
Group 3: TEST, NOT, NEG, MUL, IMUL, DIV, IDIV
Group 4: INC, DEC (byte operation)
Group 5: INC, DEC (word/doubleword/quadword operation)

These instructions are not boosted previously because there are reserved
opcodes within the groups, e.g., group 2 with ModR/M.nnn == 110 is
unmapped. As a result, kprobes attached to them requires two int3 traps
as being non-boostable also prevents jump-optimization.

Some simple tests on QEMU show that after boosting and jump-optimization
a single kprobe on these instructions with an empty pre-handler runs 10x
faster (~1000 cycles vs. ~100 cycles).

Since these instructions are mostly ALU operations and do not touch
special registers like RIP, let's boost them so that we get the
performance benefit.

Signed-off-by: Jinghao Jia <[email protected]>
---
arch/x86/kernel/kprobes/core.c | 23 +++++++++++++++++------
1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 7a08d6a486c8..530f6d4b34f4 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -169,22 +169,33 @@ bool can_boost(struct insn *insn, void *addr)
case 0x62: /* bound */
case 0x70 ... 0x7f: /* Conditional jumps */
case 0x9a: /* Call far */
- case 0xc0 ... 0xc1: /* Grp2 */
case 0xcc ... 0xce: /* software exceptions */
- case 0xd0 ... 0xd3: /* Grp2 */
case 0xd6: /* (UD) */
case 0xd8 ... 0xdf: /* ESC */
case 0xe0 ... 0xe3: /* LOOP*, JCXZ */
case 0xe8 ... 0xe9: /* near Call, JMP */
case 0xeb: /* Short JMP */
case 0xf0 ... 0xf4: /* LOCK/REP, HLT */
- case 0xf6 ... 0xf7: /* Grp3 */
- case 0xfe: /* Grp4 */
/* ... are not boostable */
return false;
+ case 0xc0 ... 0xc1: /* Grp2 */
+ case 0xd0 ... 0xd3: /* Grp2 */
+ /*
+ * AMD uses nnn == 110 as SHL/SAL, but Intel makes it reserved.
+ */
+ return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b110;
+ case 0xf6 ... 0xf7: /* Grp3 */
+ /* AMD uses nnn == 001 as TEST, but Intel makes it reserved. */
+ return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b001;
+ case 0xfe: /* Grp4 */
+ /* Only INC and DEC are boostable */
+ return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 ||
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001;
case 0xff: /* Grp5 */
- /* Only indirect jmp is boostable */
- return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
+ /* Only INC, DEC, and indirect JMP are boostable */
+ return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 ||
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001 ||
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 0b100;
default:
return true;
}
--
2.43.0


2024-02-04 05:23:10

by Jinghao Jia

[permalink] [raw]
Subject: [PATCH v2 2/3] x86/kprobes: Prohibit kprobing on INT and UD

Both INT (INT n, INT1, INT3, INTO) and UD (UD0, UD1, UD2) serve special
purposes in the kernel, e.g., INT3 is used by KGDB and UD2 is involved
in LLVM-KCFI instrumentation. At the same time, attaching kprobes on
these instructions (particularly UD) will pollute the stack trace dumped
in the kernel ring buffer, since the exception is triggered in the copy
buffer rather than the original location.

Check for INT and UD in can_probe and reject any kprobes trying to
attach to these instructions.

Suggested-by: Masami Hiramatsu (Google) <[email protected]>
Signed-off-by: Jinghao Jia <[email protected]>
---
arch/x86/kernel/kprobes/core.c | 48 +++++++++++++++++++++++++++-------
1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 644d416441fb..7a08d6a486c8 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -252,7 +252,28 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add
return __recover_probed_insn(buf, addr);
}

-/* Check if paddr is at an instruction boundary */
+/* Check if insn is INT or UD */
+static inline bool is_exception_insn(struct insn *insn)
+{
+ /* UD uses 0f escape */
+ if (insn->opcode.bytes[0] == 0x0f) {
+ /* UD0 / UD1 / UD2 */
+ return insn->opcode.bytes[1] == 0xff ||
+ insn->opcode.bytes[1] == 0xb9 ||
+ insn->opcode.bytes[1] == 0x0b;
+ }
+
+ /* INT3 / INT n / INTO / INT1 */
+ return insn->opcode.bytes[0] == 0xcc ||
+ insn->opcode.bytes[0] == 0xcd ||
+ insn->opcode.bytes[0] == 0xce ||
+ insn->opcode.bytes[0] == 0xf1;
+}
+
+/*
+ * Check if paddr is at an instruction boundary and that instruction can
+ * be probed
+ */
static bool can_probe(unsigned long paddr)
{
unsigned long addr, __addr, offset = 0;
@@ -291,6 +312,22 @@ static bool can_probe(unsigned long paddr)
#endif
addr += insn.length;
}
+
+ /* Check if paddr is at an instruction boundary */
+ if (addr != paddr)
+ return false;
+
+ __addr = recover_probed_instruction(buf, addr);
+ if (!__addr)
+ return false;
+
+ if (insn_decode_kernel(&insn, (void *)__addr) < 0)
+ return false;
+
+ /* INT and UD are special and should not be kprobed */
+ if (is_exception_insn(&insn))
+ return false;
+
if (IS_ENABLED(CONFIG_CFI_CLANG)) {
/*
* The compiler generates the following instruction sequence
@@ -305,13 +342,6 @@ static bool can_probe(unsigned long paddr)
* Also, these movl and addl are used for showing expected
* type. So those must not be touched.
*/
- __addr = recover_probed_instruction(buf, addr);
- if (!__addr)
- return false;
-
- if (insn_decode_kernel(&insn, (void *)__addr) < 0)
- return false;
-
if (insn.opcode.value == 0xBA)
offset = 12;
else if (insn.opcode.value == 0x3)
@@ -325,7 +355,7 @@ static bool can_probe(unsigned long paddr)
}

out:
- return (addr == paddr);
+ return true;
}

/* If x86 supports IBT (ENDBR) it must be skipped. */
--
2.43.0


2024-02-04 12:09:45

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] x86/kprobes: Boost more instructions from grp2/3/4/5

On Sat, 3 Feb 2024 21:13:00 -0600
Jinghao Jia <[email protected]> wrote:

> With the instruction decoder, we are now able to decode and recognize
> instructions with opcode extensions. There are more instructions in
> these groups that can be boosted:
>
> Group 2: ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR
> Group 3: TEST, NOT, NEG, MUL, IMUL, DIV, IDIV
> Group 4: INC, DEC (byte operation)
> Group 5: INC, DEC (word/doubleword/quadword operation)
>
> These instructions are not boosted previously because there are reserved
> opcodes within the groups, e.g., group 2 with ModR/M.nnn == 110 is
> unmapped. As a result, kprobes attached to them requires two int3 traps
> as being non-boostable also prevents jump-optimization.
>
> Some simple tests on QEMU show that after boosting and jump-optimization
> a single kprobe on these instructions with an empty pre-handler runs 10x
> faster (~1000 cycles vs. ~100 cycles).
>
> Since these instructions are mostly ALU operations and do not touch
> special registers like RIP, let's boost them so that we get the
> performance benefit.
>

This looks good to me. And can you check how many instructions in the
vmlinux will be covered by this change typically?

Thank you,

> Signed-off-by: Jinghao Jia <[email protected]>
> ---
> arch/x86/kernel/kprobes/core.c | 23 +++++++++++++++++------
> 1 file changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index 7a08d6a486c8..530f6d4b34f4 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -169,22 +169,33 @@ bool can_boost(struct insn *insn, void *addr)
> case 0x62: /* bound */
> case 0x70 ... 0x7f: /* Conditional jumps */
> case 0x9a: /* Call far */
> - case 0xc0 ... 0xc1: /* Grp2 */
> case 0xcc ... 0xce: /* software exceptions */
> - case 0xd0 ... 0xd3: /* Grp2 */
> case 0xd6: /* (UD) */
> case 0xd8 ... 0xdf: /* ESC */
> case 0xe0 ... 0xe3: /* LOOP*, JCXZ */
> case 0xe8 ... 0xe9: /* near Call, JMP */
> case 0xeb: /* Short JMP */
> case 0xf0 ... 0xf4: /* LOCK/REP, HLT */
> - case 0xf6 ... 0xf7: /* Grp3 */
> - case 0xfe: /* Grp4 */
> /* ... are not boostable */
> return false;
> + case 0xc0 ... 0xc1: /* Grp2 */
> + case 0xd0 ... 0xd3: /* Grp2 */
> + /*
> + * AMD uses nnn == 110 as SHL/SAL, but Intel makes it reserved.
> + */
> + return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b110;
> + case 0xf6 ... 0xf7: /* Grp3 */
> + /* AMD uses nnn == 001 as TEST, but Intel makes it reserved. */
> + return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b001;
> + case 0xfe: /* Grp4 */
> + /* Only INC and DEC are boostable */
> + return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 ||
> + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001;
> case 0xff: /* Grp5 */
> - /* Only indirect jmp is boostable */
> - return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
> + /* Only INC, DEC, and indirect JMP are boostable */
> + return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 ||
> + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001 ||
> + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b100;
> default:
> return true;
> }
> --
> 2.43.0
>


--
Masami Hiramatsu (Google) <[email protected]>

2024-02-04 12:10:55

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] x86/kprobes: Refactor can_{probe,boost} return type to bool

On Sat, 3 Feb 2024 21:12:58 -0600
Jinghao Jia <[email protected]> wrote:

> Both can_probe and can_boost have int return type but are using int as
> boolean in their context.
>
> Refactor both functions to make them actually return boolean.
>

This and next one looks good to me.

Acked-by: Masami Hiramatsu (Google) <[email protected]>

Let me pick those.

> Signed-off-by: Jinghao Jia <[email protected]>
> ---
> arch/x86/kernel/kprobes/common.h | 2 +-
> arch/x86/kernel/kprobes/core.c | 33 +++++++++++++++-----------------
> 2 files changed, 16 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
> index c993521d4933..e772276f5aa9 100644
> --- a/arch/x86/kernel/kprobes/common.h
> +++ b/arch/x86/kernel/kprobes/common.h
> @@ -78,7 +78,7 @@
> #endif
>
> /* Ensure if the instruction can be boostable */
> -extern int can_boost(struct insn *insn, void *orig_addr);
> +extern bool can_boost(struct insn *insn, void *orig_addr);
> /* Recover instruction if given address is probed */
> extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
> unsigned long addr);
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index e8babebad7b8..644d416441fb 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -137,14 +137,14 @@ NOKPROBE_SYMBOL(synthesize_relcall);
> * Returns non-zero if INSN is boostable.
> * RIP relative instructions are adjusted at copying time in 64 bits mode
> */
> -int can_boost(struct insn *insn, void *addr)
> +bool can_boost(struct insn *insn, void *addr)
> {
> kprobe_opcode_t opcode;
> insn_byte_t prefix;
> int i;
>
> if (search_exception_tables((unsigned long)addr))
> - return 0; /* Page fault may occur on this address. */
> + return false; /* Page fault may occur on this address. */
>
> /* 2nd-byte opcode */
> if (insn->opcode.nbytes == 2)
> @@ -152,7 +152,7 @@ int can_boost(struct insn *insn, void *addr)
> (unsigned long *)twobyte_is_boostable);
>
> if (insn->opcode.nbytes != 1)
> - return 0;
> + return false;
>
> for_each_insn_prefix(insn, i, prefix) {
> insn_attr_t attr;
> @@ -160,7 +160,7 @@ int can_boost(struct insn *insn, void *addr)
> attr = inat_get_opcode_attribute(prefix);
> /* Can't boost Address-size override prefix and CS override prefix */
> if (prefix == 0x2e || inat_is_address_size_prefix(attr))
> - return 0;
> + return false;
> }
>
> opcode = insn->opcode.bytes[0];
> @@ -181,12 +181,12 @@ int can_boost(struct insn *insn, void *addr)
> case 0xf6 ... 0xf7: /* Grp3 */
> case 0xfe: /* Grp4 */
> /* ... are not boostable */
> - return 0;
> + return false;
> case 0xff: /* Grp5 */
> /* Only indirect jmp is boostable */
> return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
> default:
> - return 1;
> + return true;
> }
> }
>
> @@ -253,20 +253,18 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add
> }
>
> /* Check if paddr is at an instruction boundary */
> -static int can_probe(unsigned long paddr)
> +static bool can_probe(unsigned long paddr)
> {
> unsigned long addr, __addr, offset = 0;
> struct insn insn;
> kprobe_opcode_t buf[MAX_INSN_SIZE];
>
> if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
> - return 0;
> + return false;
>
> /* Decode instructions */
> addr = paddr - offset;
> while (addr < paddr) {
> - int ret;
> -
> /*
> * Check if the instruction has been modified by another
> * kprobe, in which case we replace the breakpoint by the
> @@ -277,11 +275,10 @@ static int can_probe(unsigned long paddr)
> */
> __addr = recover_probed_instruction(buf, addr);
> if (!__addr)
> - return 0;
> + return false;
>
> - ret = insn_decode_kernel(&insn, (void *)__addr);
> - if (ret < 0)
> - return 0;
> + if (insn_decode_kernel(&insn, (void *)__addr) < 0)
> + return false;
>
> #ifdef CONFIG_KGDB
> /*
> @@ -290,7 +287,7 @@ static int can_probe(unsigned long paddr)
> */
> if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
> kgdb_has_hit_break(addr))
> - return 0;
> + return false;
> #endif
> addr += insn.length;
> }
> @@ -310,10 +307,10 @@ static int can_probe(unsigned long paddr)
> */
> __addr = recover_probed_instruction(buf, addr);
> if (!__addr)
> - return 0;
> + return false;
>
> if (insn_decode_kernel(&insn, (void *)__addr) < 0)
> - return 0;
> + return false;
>
> if (insn.opcode.value == 0xBA)
> offset = 12;
> @@ -324,7 +321,7 @@ static int can_probe(unsigned long paddr)
>
> /* This movl/addl is used for decoding CFI. */
> if (is_cfi_trap(addr + offset))
> - return 0;
> + return false;
> }
>
> out:
> --
> 2.43.0
>


--
Masami Hiramatsu (Google) <[email protected]>

2024-02-05 04:42:27

by Jinghao Jia

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] x86/kprobes: Boost more instructions from grp2/3/4/5

On 2/4/24 06:09, Masami Hiramatsu (Google) wrote:
> On Sat, 3 Feb 2024 21:13:00 -0600
> Jinghao Jia <[email protected]> wrote:
>
>> With the instruction decoder, we are now able to decode and recognize
>> instructions with opcode extensions. There are more instructions in
>> these groups that can be boosted:
>>
>> Group 2: ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR
>> Group 3: TEST, NOT, NEG, MUL, IMUL, DIV, IDIV
>> Group 4: INC, DEC (byte operation)
>> Group 5: INC, DEC (word/doubleword/quadword operation)
>>
>> These instructions are not boosted previously because there are reserved
>> opcodes within the groups, e.g., group 2 with ModR/M.nnn == 110 is
>> unmapped. As a result, kprobes attached to them requires two int3 traps
>> as being non-boostable also prevents jump-optimization.
>>
>> Some simple tests on QEMU show that after boosting and jump-optimization
>> a single kprobe on these instructions with an empty pre-handler runs 10x
>> faster (~1000 cycles vs. ~100 cycles).
>>
>> Since these instructions are mostly ALU operations and do not touch
>> special registers like RIP, let's boost them so that we get the
>> performance benefit.
>>
>
> This looks good to me. And can you check how many instructions in the
> vmlinux will be covered by this change typically?
>

I collected the stats from the LLVM CodeGen backend on kernel version 6.7.3
using Gentoo's dist-kernel config (with a mod2yesconfig to make modules
builtin) and here are the number of Grp 2/3/4/5 instructions that are newly
covered by this patch:

Kernel total # of insns: 28552017 (from objdump)
Grp2 insns: 286249 (from LLVM)
Grp3 insns: 286556 (from LLVM)
Grp4 insns: 5832 (from LLVM)
Grp5 insns: 146314 (from LLVM)

Note that using LLVM means we miss the stats from inline assembly and
assembly source files.

--Jinghao

> Thank you,
>
>> Signed-off-by: Jinghao Jia <[email protected]>
>> ---
>> arch/x86/kernel/kprobes/core.c | 23 +++++++++++++++++------
>> 1 file changed, 17 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
>> index 7a08d6a486c8..530f6d4b34f4 100644
>> --- a/arch/x86/kernel/kprobes/core.c
>> +++ b/arch/x86/kernel/kprobes/core.c
>> @@ -169,22 +169,33 @@ bool can_boost(struct insn *insn, void *addr)
>> case 0x62: /* bound */
>> case 0x70 ... 0x7f: /* Conditional jumps */
>> case 0x9a: /* Call far */
>> - case 0xc0 ... 0xc1: /* Grp2 */
>> case 0xcc ... 0xce: /* software exceptions */
>> - case 0xd0 ... 0xd3: /* Grp2 */
>> case 0xd6: /* (UD) */
>> case 0xd8 ... 0xdf: /* ESC */
>> case 0xe0 ... 0xe3: /* LOOP*, JCXZ */
>> case 0xe8 ... 0xe9: /* near Call, JMP */
>> case 0xeb: /* Short JMP */
>> case 0xf0 ... 0xf4: /* LOCK/REP, HLT */
>> - case 0xf6 ... 0xf7: /* Grp3 */
>> - case 0xfe: /* Grp4 */
>> /* ... are not boostable */
>> return false;
>> + case 0xc0 ... 0xc1: /* Grp2 */
>> + case 0xd0 ... 0xd3: /* Grp2 */
>> + /*
>> + * AMD uses nnn == 110 as SHL/SAL, but Intel makes it reserved.
>> + */
>> + return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b110;
>> + case 0xf6 ... 0xf7: /* Grp3 */
>> + /* AMD uses nnn == 001 as TEST, but Intel makes it reserved. */
>> + return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b001;
>> + case 0xfe: /* Grp4 */
>> + /* Only INC and DEC are boostable */
>> + return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 ||
>> + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001;
>> case 0xff: /* Grp5 */
>> - /* Only indirect jmp is boostable */
>> - return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
>> + /* Only INC, DEC, and indirect JMP are boostable */
>> + return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 ||
>> + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001 ||
>> + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b100;
>> default:
>> return true;
>> }
>> --
>> 2.43.0
>>
>
>


Attachments:
OpenPGP_signature.asc (855.00 B)
OpenPGP digital signature

2024-02-06 23:41:06

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] x86/kprobes: Boost more instructions from grp2/3/4/5

On Sun, 4 Feb 2024 22:39:32 -0600
Jinghao Jia <[email protected]> wrote:

> On 2/4/24 06:09, Masami Hiramatsu (Google) wrote:
> > On Sat, 3 Feb 2024 21:13:00 -0600
> > Jinghao Jia <[email protected]> wrote:
> >
> >> With the instruction decoder, we are now able to decode and recognize
> >> instructions with opcode extensions. There are more instructions in
> >> these groups that can be boosted:
> >>
> >> Group 2: ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR
> >> Group 3: TEST, NOT, NEG, MUL, IMUL, DIV, IDIV
> >> Group 4: INC, DEC (byte operation)
> >> Group 5: INC, DEC (word/doubleword/quadword operation)
> >>
> >> These instructions are not boosted previously because there are reserved
> >> opcodes within the groups, e.g., group 2 with ModR/M.nnn == 110 is
> >> unmapped. As a result, kprobes attached to them requires two int3 traps
> >> as being non-boostable also prevents jump-optimization.
> >>
> >> Some simple tests on QEMU show that after boosting and jump-optimization
> >> a single kprobe on these instructions with an empty pre-handler runs 10x
> >> faster (~1000 cycles vs. ~100 cycles).
> >>
> >> Since these instructions are mostly ALU operations and do not touch
> >> special registers like RIP, let's boost them so that we get the
> >> performance benefit.
> >>
> >
> > This looks good to me. And can you check how many instructions in the
> > vmlinux will be covered by this change typically?
> >
>
> I collected the stats from the LLVM CodeGen backend on kernel version 6.7.3
> using Gentoo's dist-kernel config (with a mod2yesconfig to make modules
> builtin) and here are the number of Grp 2/3/4/5 instructions that are newly
> covered by this patch:
>
> Kernel total # of insns: 28552017 (from objdump)
> Grp2 insns: 286249 (from LLVM)
> Grp3 insns: 286556 (from LLVM)
> Grp4 insns: 5832 (from LLVM)
> Grp5 insns: 146314 (from LLVM)
>
> Note that using LLVM means we miss the stats from inline assembly and
> assembly source files.

Thanks for checking! so it increases the coverage ~2.5% :)

Thank you,


>
> --Jinghao
>
> > Thank you,
> >
> >> Signed-off-by: Jinghao Jia <[email protected]>
> >> ---
> >> arch/x86/kernel/kprobes/core.c | 23 +++++++++++++++++------
> >> 1 file changed, 17 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> >> index 7a08d6a486c8..530f6d4b34f4 100644
> >> --- a/arch/x86/kernel/kprobes/core.c
> >> +++ b/arch/x86/kernel/kprobes/core.c
> >> @@ -169,22 +169,33 @@ bool can_boost(struct insn *insn, void *addr)
> >> case 0x62: /* bound */
> >> case 0x70 ... 0x7f: /* Conditional jumps */
> >> case 0x9a: /* Call far */
> >> - case 0xc0 ... 0xc1: /* Grp2 */
> >> case 0xcc ... 0xce: /* software exceptions */
> >> - case 0xd0 ... 0xd3: /* Grp2 */
> >> case 0xd6: /* (UD) */
> >> case 0xd8 ... 0xdf: /* ESC */
> >> case 0xe0 ... 0xe3: /* LOOP*, JCXZ */
> >> case 0xe8 ... 0xe9: /* near Call, JMP */
> >> case 0xeb: /* Short JMP */
> >> case 0xf0 ... 0xf4: /* LOCK/REP, HLT */
> >> - case 0xf6 ... 0xf7: /* Grp3 */
> >> - case 0xfe: /* Grp4 */
> >> /* ... are not boostable */
> >> return false;
> >> + case 0xc0 ... 0xc1: /* Grp2 */
> >> + case 0xd0 ... 0xd3: /* Grp2 */
> >> + /*
> >> + * AMD uses nnn == 110 as SHL/SAL, but Intel makes it reserved.
> >> + */
> >> + return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b110;
> >> + case 0xf6 ... 0xf7: /* Grp3 */
> >> + /* AMD uses nnn == 001 as TEST, but Intel makes it reserved. */
> >> + return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b001;
> >> + case 0xfe: /* Grp4 */
> >> + /* Only INC and DEC are boostable */
> >> + return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 ||
> >> + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001;
> >> case 0xff: /* Grp5 */
> >> - /* Only indirect jmp is boostable */
> >> - return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
> >> + /* Only INC, DEC, and indirect JMP are boostable */
> >> + return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 ||
> >> + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001 ||
> >> + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b100;
> >> default:
> >> return true;
> >> }
> >> --
> >> 2.43.0
> >>
> >
> >


--
Masami Hiramatsu (Google) <[email protected]>