Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753314Ab2BUIgc (ORCPT ); Tue, 21 Feb 2012 03:36:32 -0500 Received: from mail9.hitachi.co.jp ([133.145.228.44]:43588 "EHLO mail9.hitachi.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752315Ab2BUIgb (ORCPT ); Tue, 21 Feb 2012 03:36:31 -0500 X-AuditID: b753bd60-9fe84ba000000655-52-4f43578c2ede X-AuditID: b753bd60-9fe84ba000000655-52-4f43578c2ede From: Masami Hiramatsu Subject: [PATCH v2 -tip] [BUGFIX] x86/kprobes: Fix to recover instructions on optimized path To: Ingo Molnar , linux-kernel@vger.kernel.org Cc: yrl.pp-manager.tt@hitachi.com, Masami Hiramatsu , Thomas Gleixner , Ingo Molnar , "H. Peter Anvin" , Ananth N Mavinakayanahalli Date: Tue, 21 Feb 2012 17:36:21 +0900 Message-ID: <20120221083621.11350.4716.stgit@localhost.localdomain> User-Agent: StGIT/0.14.3 MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit X-Brightmail-Tracker: AAAAAA== Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10783 Lines: 321 Current probed-instruction recovery expects that only breakpoint instruction modifies instruction. However, since kprobes jump optimization can replace original instructions with a jump, that expectation is not enough. And it may cause instruction decoding failure on the function where an optimized probe already exists. This bug can reproduce easily as below. 1) find a target function address (any kprobe-able function is OK) $ grep __secure_computing /proc/kallsyms ffffffff810c19d0 T __secure_computing 2) decode the function $ objdump -d vmlinux --start-address=0xffffffff810c19d0 --stop-address=0xffffffff810c19eb vmlinux: file format elf64-x86-64 Disassembly of section .text: ffffffff810c19d0 <__secure_computing>: ffffffff810c19d0: 55 push %rbp ffffffff810c19d1: 48 89 e5 mov %rsp,%rbp ffffffff810c19d4: e8 67 8f 72 00 callq ffffffff817ea940 ffffffff810c19d9: 65 48 8b 04 25 40 b8 mov %gs:0xb840,%rax ffffffff810c19e0: 00 00 ffffffff810c19e2: 83 b8 88 05 00 00 01 cmpl $0x1,0x588(%rax) ffffffff810c19e9: 74 05 je ffffffff810c19f0 <__secure_computing+0x20> 3) put a kprobe-event at an optimize-able place, where no call/jump places within the 5 bytes. $ su - # cd /sys/kernel/debug/tracing # echo p __secure_computing+0x9 > kprobe_events 4) enable it and check it is optimized. # echo 1 > events/kprobes/p___secure_computing_9/enable # cat ../kprobes/list ffffffff810c19d9 k __secure_computing+0x9 [OPTIMIZED] 5) put another kprobe on an instruction after previous probe in the same function. # echo p __secure_computing+0x12 >> kprobe_events bash: echo: write error: Invalid argument # dmesg | tail -n 1 [ 1666.500016] Probing address(0xffffffff810c19e2) is not an instruction boundary. 6) however, if the kprobes optimization is disabled, it works. # echo 0 > /proc/sys/debug/kprobes-optimization # cat ../kprobes/list ffffffff810c19d9 k __secure_computing+0x9 # echo p __secure_computing+0x12 >> kprobe_events (no error) This is because kprobes doesn't recover the instruction which is overwritten with a relative jump by another kprobe when finding instruction boundary. It only recovers the breakpoint instruction. This patch fixes kprobes to recover such instructions. With this fix: # echo p __secure_computing+0x9 > kprobe_events # echo 1 > events/kprobes/p___secure_computing_9/enable # cat ../kprobes/list ffffffff810c1aa9 k __secure_computing+0x9 [OPTIMIZED] # echo p __secure_computing+0x12 >> kprobe_events # cat ../kprobes/list ffffffff810c1aa9 k __secure_computing+0x9 [OPTIMIZED] ffffffff810c1ab2 k __secure_computing+0x12 [DISABLED] Changes in v2: - Fix a bug to recover original instruction address in RIP-relative instruction fixup. - Moved on tip/master. Signed-off-by: Masami Hiramatsu Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli --- arch/x86/kernel/kprobes.c | 115 +++++++++++++++++++++++++++------------------ include/linux/kprobes.h | 6 ++ kernel/kprobes.c | 2 - 3 files changed, 77 insertions(+), 46 deletions(-) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7da647d..e91aa75 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -207,13 +207,29 @@ retry: } } -/* Recover the probed instruction at addr for further analysis. */ -static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +/* + * Recover the probed instruction at addr for further analysis. + * Caller must lock kprobes by kprobe_mutex, or disable preemption + * for preventing to release referencing kprobes. + */ +static unsigned long recover_probed_instruction(kprobe_opcode_t *buf, + unsigned long addr) { struct kprobe *kp; - kp = get_kprobe((void *)addr); - if (!kp) - return -EINVAL; + int i; + + for (i = 0; i < RELATIVEJUMP_SIZE; i++) { + kp = get_kprobe((void *)addr - i); + if (kp) + goto found; + } + + /* There is no probes, return original address */ + return addr; + +found: + if (i != 0 && !kprobe_optready(kp)) + return addr; /* this probe doesn't affect the instruction */ /* * Basically, kp->ainsn.insn has an original instruction. @@ -229,15 +245,33 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) * from it and kp->opcode. */ memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - buf[0] = kp->opcode; - return 0; + if (i == 0) + buf[0] = kp->opcode; + + if (kprobe_optready(kp)) { + /* + * If the kprobe can be optimized, original bytes which + * can be overwritten by jump destination address. In this + * case, original bytes must be recovered from + * op->optinsn.copied_insn buffer. + */ + struct optimized_kprobe *op; + op = container_of(kp, struct optimized_kprobe, kp); + if (i == 0) + memcpy(buf + 1, op->optinsn.copied_insn, + RELATIVE_ADDR_SIZE); + else + memcpy(buf, op->optinsn.copied_insn + i - 1, + RELATIVE_ADDR_SIZE - i + 1); + } + + return (unsigned long)buf; } /* Check if paddr is at an instruction boundary */ static int __kprobes can_probe(unsigned long paddr) { - int ret; - unsigned long addr, offset = 0; + unsigned long addr, __addr, offset = 0; struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; @@ -247,26 +281,24 @@ static int __kprobes can_probe(unsigned long paddr) /* Decode instructions */ addr = paddr - offset; while (addr < paddr) { - kernel_insn_init(&insn, (void *)addr); - insn_get_opcode(&insn); - /* * Check if the instruction has been modified by another * kprobe, in which case we replace the breakpoint by the * original instruction in our buffer. + * Also, jump optimization will change the breakpoint to + * relative-jump. Since the relative-jump itself is + * normally used, we just go through if there is no kprobe. */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { - ret = recover_probed_instruction(buf, addr); - if (ret) - /* - * Another debugging subsystem might insert - * this breakpoint. In that case, we can't - * recover it. - */ - return 0; - kernel_insn_init(&insn, buf); - } + __addr = recover_probed_instruction(buf, addr); + kernel_insn_init(&insn, (void *)__addr); insn_get_length(&insn); + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + /* + * Another debugging subsystem might insert + * this breakpoint. In that case, we can't + * recover it. + */ + return 0; addr += insn.length; } @@ -302,21 +334,17 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) { struct insn insn; - int ret; kprobe_opcode_t buf[MAX_INSN_SIZE]; + u8 *orig_src = src; /* Backup original src for RIP calculation */ + + if (recover) + src = (u8 *)recover_probed_instruction(buf, (unsigned long)src); kernel_insn_init(&insn, src); - if (recover) { - insn_get_opcode(&insn); - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { - ret = recover_probed_instruction(buf, - (unsigned long)src); - if (ret) - return 0; - kernel_insn_init(&insn, buf); - } - } insn_get_length(&insn); + /* Another subsystem puts a breakpoint, failed to recover */ + if (recover && insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; memcpy(dest, insn.kaddr, insn.length); #ifdef CONFIG_X86_64 @@ -337,7 +365,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) * extension of the original signed 32-bit displacement would * have given. */ - newdisp = (u8 *) src + (s64) insn.displacement.value - + newdisp = (u8 *) orig_src + (s64) insn.displacement.value - (u8 *) dest; BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ disp = (u8 *) dest + insn_offset_displacement(&insn); @@ -1271,8 +1299,7 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) /* Decode whole function to ensure any instructions don't jump into target */ static int __kprobes can_optimize(unsigned long paddr) { - int ret; - unsigned long addr, size = 0, offset = 0; + unsigned long addr, __addr, size = 0, offset = 0; struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; @@ -1301,15 +1328,12 @@ static int __kprobes can_optimize(unsigned long paddr) * we can't optimize kprobe in this function. */ return 0; - kernel_insn_init(&insn, (void *)addr); - insn_get_opcode(&insn); - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { - ret = recover_probed_instruction(buf, addr); - if (ret) - return 0; - kernel_insn_init(&insn, buf); - } + __addr = recover_probed_instruction(buf, addr); + kernel_insn_init(&insn, (void *)__addr); insn_get_length(&insn); + /* Another subsystem puts a breakpoint */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); @@ -1366,6 +1390,7 @@ void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) /* * Copy replacing target instructions * Target instructions MUST be relocatable (checked inside) + * This is called when new aggr(opt)probe is allocated or reused. */ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) { diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index dce6e4d..6abec49 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -293,6 +293,12 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, size_t *length, loff_t *ppos); #endif +extern int kprobe_optready(struct kprobe *p); +#else /* CONFIG_OPTPROBES */ +static inline int kprobe_optready(struct kprobe *p) +{ + return 0; +} #endif /* CONFIG_OPTPROBES */ /* Get the kprobe at this addr (if any) - called with preemption disabled */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9788c0e..c52c68b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -403,7 +403,7 @@ static __kprobes void free_aggr_kprobe(struct kprobe *p) } /* Return true(!0) if the kprobe is ready for optimization. */ -static inline int kprobe_optready(struct kprobe *p) +int kprobe_optready(struct kprobe *p) { struct optimized_kprobe *op; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/