Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933174AbbLGVwd (ORCPT ); Mon, 7 Dec 2015 16:52:33 -0500 Received: from mail.kernel.org ([198.145.29.136]:47548 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932886AbbLGVwB (ORCPT ); Mon, 7 Dec 2015 16:52:01 -0500 From: Andy Lutomirski To: x86@kernel.org Cc: linux-kernel@vger.kernel.org, Brian Gerst , Borislav Petkov , =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= , Denys Vlasenko , Linus Torvalds , Andy Lutomirski Subject: [PATCH 12/12] x86/entry: Do enter_from_user_mode with IRQs off Date: Mon, 7 Dec 2015 13:51:37 -0800 Message-Id: X-Mailer: git-send-email 2.5.0 In-Reply-To: References: In-Reply-To: References: Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4608 Lines: 137 Now that slow-path syscalls always enter C before enabling interrupts, it's straightforward to do enter_from_user_mode before enabling interrupts rather than doing it as part of entry tracing. With this change, we should finally be able to retire exception_enter. This will also enable optimizations based on knowing that we never change context tracking state with interrupts on. Signed-off-by: Andy Lutomirski --- arch/x86/entry/common.c | 39 ++++++++++++++------------------------ arch/x86/include/asm/thread_info.h | 5 ++++- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index b8a848f80b2a..016ac47c954b 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -37,14 +37,17 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) return (struct thread_info *)(top_of_stack - THREAD_SIZE); } -#ifdef CONFIG_CONTEXT_TRACKING +#ifndef CONFIG_CONTEXT_TRACKING +static +#else +__visible +#endif /* Called on entry from user mode with IRQs off. */ -__visible void enter_from_user_mode(void) +void enter_from_user_mode(void) { CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit(); } -#endif static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) { @@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; -#ifdef CONFIG_CONTEXT_TRACKING - /* - * If TIF_NOHZ is set, we are required to call user_exit() before - * doing anything that could touch RCU. - */ - if (work & _TIF_NOHZ) { - enter_from_user_mode(); - work &= ~_TIF_NOHZ; - } -#endif - #ifdef CONFIG_SECCOMP /* * Do seccomp first -- it should minimize exposure of other @@ -350,6 +342,7 @@ __visible void do_syscall_64(struct pt_regs *regs) struct thread_info *ti = pt_regs_to_thread_info(regs); unsigned long nr = regs->orig_ax; + enter_from_user_mode(); local_irq_enable(); if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) @@ -372,9 +365,9 @@ __visible void do_syscall_64(struct pt_regs *regs) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) /* - * Does a 32-bit syscall. Called with IRQs on and does all entry and - * exit work and returns with IRQs off. This function is extremely hot - * in workloads that use it, and it's usually called from + * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does + * all entry and exit work and returns with IRQs off. This function is + * extremely hot in workloads that use it, and it's usually called from * do_fast_syscall_32, so forcibly inline it to improve performance. */ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) @@ -415,6 +408,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) /* Handles int $0x80 */ __visible void do_int80_syscall_32(struct pt_regs *regs) { + enter_from_user_mode(); local_irq_enable(); do_syscall_32_irqs_on(regs); } @@ -437,11 +431,9 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) */ regs->ip = landing_pad; - /* - * Fetch ECX from where the vDSO stashed it. - * - * WARNING: We are in CONTEXT_USER and RCU isn't paying attention! - */ + enter_from_user_mode(); + + /* Fetch ECX from where the vDSO stashed it. */ local_irq_enable(); if ( #ifdef CONFIG_X86_64 @@ -460,9 +452,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) /* User code screwed up. */ local_irq_disable(); regs->ax = -EFAULT; -#ifdef CONFIG_CONTEXT_TRACKING - enter_from_user_mode(); -#endif prepare_exit_to_usermode(regs); return 0; /* Keep it simple: use IRET. */ } diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a1ecd214d227..ae210d6159d3 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -136,7 +136,10 @@ struct thread_info { #define _TIF_ADDR32 (1 << TIF_ADDR32) #define _TIF_X32 (1 << TIF_X32) -/* work to do in syscall_trace_enter() */ +/* + * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for + * enter_from_user_mode() + */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/