Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753203AbaGUFmn (ORCPT ); Mon, 21 Jul 2014 01:42:43 -0400 Received: from mga09.intel.com ([134.134.136.24]:61456 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753061AbaGUFml (ORCPT ); Mon, 21 Jul 2014 01:42:41 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.01,698,1400050800"; d="scan'208";a="576270867" From: Qiaowei Ren To: "H. Peter Anvin" , Thomas Gleixner , Ingo Molnar , Dave Hansen Cc: x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Qiaowei Ren Subject: [PATCH v7 07/10] x86, mpx: decode MPX instruction to get bound violation information Date: Mon, 21 Jul 2014 13:38:41 +0800 Message-Id: <1405921124-4230-8-git-send-email-qiaowei.ren@intel.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1405921124-4230-1-git-send-email-qiaowei.ren@intel.com> References: <1405921124-4230-1-git-send-email-qiaowei.ren@intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org This patch sets bound violation fields of siginfo struct in #BR exception handler by decoding the user instruction and constructing the faulting pointer. This patch does't use the generic decoder, and implements a limited special-purpose decoder to decode MPX instructions, simply because the generic decoder is very heavyweight not just in terms of performance but in terms of interface -- because it has to. Signed-off-by: Qiaowei Ren --- arch/x86/include/asm/mpx.h | 23 ++++ arch/x86/kernel/mpx.c | 299 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/traps.c | 6 + 3 files changed, 328 insertions(+), 0 deletions(-) diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index b7598ac..780af63 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -3,6 +3,7 @@ #include #include +#include #ifdef CONFIG_X86_64 @@ -44,15 +45,37 @@ #define MPX_BNDSTA_ERROR_CODE 0x3 #define MPX_BD_ENTRY_VALID_FLAG 0x1 +struct mpx_insn { + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + + unsigned char addr_bytes; /* effective address size */ + unsigned char limit; + unsigned char x86_64; + + const unsigned char *kaddr; /* kernel address of insn to analyze */ + const unsigned char *next_byte; +}; + +#define MAX_MPX_INSN_SIZE 15 + unsigned long mpx_mmap(unsigned long len); #ifdef CONFIG_X86_INTEL_MPX int do_mpx_bt_fault(struct xsave_struct *xsave_buf); +void do_mpx_bounds(struct pt_regs *regs, siginfo_t *info, + struct xsave_struct *xsave_buf); #else static inline int do_mpx_bt_fault(struct xsave_struct *xsave_buf) { return -EINVAL; } +static inline void do_mpx_bounds(struct pt_regs *regs, siginfo_t *info, + struct xsave_struct *xsave_buf) +{ +} #endif /* CONFIG_X86_INTEL_MPX */ #endif /* _ASM_X86_MPX_H */ diff --git a/arch/x86/kernel/mpx.c b/arch/x86/kernel/mpx.c index f02dcea..c1957a8 100644 --- a/arch/x86/kernel/mpx.c +++ b/arch/x86/kernel/mpx.c @@ -2,6 +2,275 @@ #include #include +enum reg_type { + REG_TYPE_RM = 0, + REG_TYPE_INDEX, + REG_TYPE_BASE, +}; + +static unsigned long get_reg(struct mpx_insn *insn, struct pt_regs *regs, + enum reg_type type) +{ + int regno = 0; + unsigned char modrm = (unsigned char)insn->modrm.value; + unsigned char sib = (unsigned char)insn->sib.value; + + static const int regoff[] = { + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, sp), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di), +#ifdef CONFIG_X86_64 + offsetof(struct pt_regs, r8), + offsetof(struct pt_regs, r9), + offsetof(struct pt_regs, r10), + offsetof(struct pt_regs, r11), + offsetof(struct pt_regs, r12), + offsetof(struct pt_regs, r13), + offsetof(struct pt_regs, r14), + offsetof(struct pt_regs, r15), +#endif + }; + + switch (type) { + case REG_TYPE_RM: + regno = X86_MODRM_RM(modrm); + if (X86_REX_B(insn->rex_prefix.value) == 1) + regno += 8; + break; + + case REG_TYPE_INDEX: + regno = X86_SIB_INDEX(sib); + if (X86_REX_X(insn->rex_prefix.value) == 1) + regno += 8; + break; + + case REG_TYPE_BASE: + regno = X86_SIB_BASE(sib); + if (X86_REX_B(insn->rex_prefix.value) == 1) + regno += 8; + break; + + default: + break; + } + + return regs_get_register(regs, regoff[regno]); +} + +/* + * return the address being referenced be instruction + * for rm=3 returning the content of the rm reg + * for rm!=3 calculates the address using SIB and Disp + */ +static unsigned long get_addr_ref(struct mpx_insn *insn, struct pt_regs *regs) +{ + unsigned long addr; + unsigned long base; + unsigned long indx; + unsigned char modrm = (unsigned char)insn->modrm.value; + unsigned char sib = (unsigned char)insn->sib.value; + + if (X86_MODRM_MOD(modrm) == 3) { + addr = get_reg(insn, regs, REG_TYPE_RM); + } else { + if (insn->sib.nbytes) { + base = get_reg(insn, regs, REG_TYPE_BASE); + indx = get_reg(insn, regs, REG_TYPE_INDEX); + addr = base + indx * (1 << X86_SIB_SCALE(sib)); + } else { + addr = get_reg(insn, regs, REG_TYPE_RM); + } + addr += insn->displacement.value; + } + + return addr; +} + +/* Verify next sizeof(t) bytes can be on the same instruction */ +#define validate_next(t, insn, n) \ + ((insn)->next_byte + sizeof(t) + n - (insn)->kaddr <= (insn)->limit) + +#define __get_next(t, insn) \ +({ \ + t r = *(t *)insn->next_byte; \ + insn->next_byte += sizeof(t); \ + r; \ +}) + +#define __peek_next(t, insn) \ +({ \ + t r = *(t *)insn->next_byte; \ + r; \ +}) + +#define get_next(t, insn) \ +({ \ + if (unlikely(!validate_next(t, insn, 0))) \ + goto err_out; \ + __get_next(t, insn); \ +}) + +#define peek_next(t, insn) \ +({ \ + if (unlikely(!validate_next(t, insn, 0))) \ + goto err_out; \ + __peek_next(t, insn); \ +}) + +static void mpx_insn_get_prefixes(struct mpx_insn *insn) +{ + unsigned char b; + + /* Decode legacy prefix and REX prefix */ + b = peek_next(unsigned char, insn); + while (b != 0x0f) { + /* + * look for a rex prefix + * a REX prefix cannot be followed by a legacy prefix. + */ + if (insn->x86_64 && ((b&0xf0) == 0x40)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + break; + } + + /* check the other legacy prefixes */ + switch (b) { + case 0xf2: + case 0xf3: + case 0xf0: + case 0x64: + case 0x65: + case 0x2e: + case 0x3e: + case 0x26: + case 0x36: + case 0x66: + case 0x67: + insn->next_byte++; + break; + default: /* everything else is garbage */ + goto err_out; + } + b = peek_next(unsigned char, insn); + } + +err_out: + return; +} + +static void mpx_insn_get_modrm(struct mpx_insn *insn) +{ + insn->modrm.value = get_next(unsigned char, insn); + insn->modrm.nbytes = 1; + +err_out: + return; +} + +static void mpx_insn_get_sib(struct mpx_insn *insn) +{ + unsigned char modrm = (unsigned char)insn->modrm.value; + + if (X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(unsigned char, insn); + insn->sib.nbytes = 1; + } + +err_out: + return; +} + +static void mpx_insn_get_displacement(struct mpx_insn *insn) +{ + unsigned char mod, rm, base; + + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes + * mod = 11 - no memory operand + * + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + return; + if (mod == 1) { + insn->displacement.value = get_next(unsigned char, insn); + insn->displacement.nbytes = 1; + } else if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + +err_out: + return; +} + +static void mpx_insn_init(struct mpx_insn *insn, struct pt_regs *regs) +{ + unsigned char buf[MAX_MPX_INSN_SIZE]; + int bytes; + + memset(insn, 0, sizeof(*insn)); + + bytes = copy_from_user(buf, (void __user *)regs->ip, MAX_MPX_INSN_SIZE); + insn->limit = MAX_MPX_INSN_SIZE - bytes; + insn->kaddr = buf; + insn->next_byte = buf; + + /* + * In 64-bit Mode, all Intel MPX instructions use 64-bit + * operands for bounds and 64 bit addressing, i.e. REX.W & + * 67H have no effect on data or address size. + * + * In compatibility and legacy modes (including 16-bit code + * segments, real and virtual 8086 modes) all Intel MPX + * instructions use 32-bit operands for bounds and 32 bit + * addressing. + */ +#ifdef CONFIG_X86_64 + insn->x86_64 = 1; + insn->addr_bytes = 8; +#else + insn->x86_64 = 0; + insn->addr_bytes = 4; +#endif +} + +static unsigned long mpx_insn_decode(struct mpx_insn *insn, + struct pt_regs *regs) +{ + mpx_insn_init(insn, regs); + + /* + * In this case, we only need decode bndcl/bndcn/bndcu, + * so we can use private diassembly interfaces to get + * prefixes, modrm, sib, displacement, etc.. + */ + mpx_insn_get_prefixes(insn); + insn->next_byte += 2; /* ignore opcode */ + mpx_insn_get_modrm(insn); + mpx_insn_get_sib(insn); + mpx_insn_get_displacement(insn); + + return get_addr_ref(insn, regs); +} + static int allocate_bt(long __user *bd_entry) { unsigned long bt_addr, old_val = 0; @@ -58,3 +327,33 @@ int do_mpx_bt_fault(struct xsave_struct *xsave_buf) return allocate_bt((long __user *)bd_entry); } + +/* + * If a bounds overflow occurs then a #BR is generated. The fault + * handler will decode MPX instructions to get violation address + * and set this address into extended struct siginfo. + */ +void do_mpx_bounds(struct pt_regs *regs, siginfo_t *info, + struct xsave_struct *xsave_buf) +{ + struct mpx_insn insn; + uint8_t bndregno; + unsigned long addr_vio; + + addr_vio = mpx_insn_decode(&insn, regs); + + bndregno = X86_MODRM_REG(insn.modrm.value); + if (bndregno > 3) + return; + + /* Note: the upper 32 bits are ignored in 32-bit mode. */ + info->si_lower = (void __user *)(unsigned long) + (xsave_buf->bndregs.bndregs[2*bndregno]); + info->si_upper = (void __user *)(unsigned long) + (~xsave_buf->bndregs.bndregs[2*bndregno+1]); + info->si_addr_lsb = 0; + info->si_signo = SIGSEGV; + info->si_errno = 0; + info->si_code = SEGV_BNDERR; + info->si_addr = (void __user *)addr_vio; +} diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 396a88b..93ce924 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -284,6 +284,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) unsigned long status; struct xsave_struct *xsave_buf; struct task_struct *tsk = current; + siginfo_t info; prev_state = exception_enter(); if (notify_die(DIE_TRAP, "bounds", regs, error_code, @@ -319,6 +320,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) break; case 1: /* Bound violation. */ + do_mpx_bounds(regs, &info, xsave_buf); + do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, + error_code, &info); + break; + case 0: /* No exception caused by Intel MPX operations. */ do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); break; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/