Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753864AbbBXSwa (ORCPT ); Tue, 24 Feb 2015 13:52:30 -0500 Received: from mx1.redhat.com ([209.132.183.28]:34155 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753817AbbBXSw2 (ORCPT ); Tue, 24 Feb 2015 13:52:28 -0500 From: Denys Vlasenko To: Andy Lutomirski Cc: Denys Vlasenko , Linus Torvalds , Steven Rostedt , Ingo Molnar , Borislav Petkov , "H. Peter Anvin" , Oleg Nesterov , Frederic Weisbecker , Alexei Starovoitov , Will Drewry , Kees Cook , x86@kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 4/4] x86: save user %rsp in pt_regs->sp Date: Tue, 24 Feb 2015 19:51:35 +0100 Message-Id: <1424803895-4420-4-git-send-email-dvlasenk@redhat.com> In-Reply-To: <1424803895-4420-1-git-send-email-dvlasenk@redhat.com> References: <1424803895-4420-1-git-send-email-dvlasenk@redhat.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7823 Lines: 225 PER_CPU(old_rsp) usage is simplified - now it is used only as temp storage, and userspace stack pointer is immediately stored in pt_regs->sp on syscall entry, instead of being used later, on syscall exit. This allows to get rid of thread_struct::usersp. The lazy store of pt_regs->cs and pt_regs->ss is retained - tests have shown that these insns do take ~2 cycles on fast path. Signed-off-by: Denys Vlasenko CC: Linus Torvalds CC: Steven Rostedt CC: Ingo Molnar CC: Borislav Petkov CC: "H. Peter Anvin" CC: Andy Lutomirski CC: Oleg Nesterov CC: Frederic Weisbecker CC: Alexei Starovoitov CC: Will Drewry CC: Kees Cook CC: x86@kernel.org CC: linux-kernel@vger.kernel.org --- arch/x86/include/asm/compat.h | 2 +- arch/x86/include/asm/processor.h | 6 ------ arch/x86/include/asm/ptrace.h | 8 ++------ arch/x86/kernel/entry_64.S | 22 +++++++++------------- arch/x86/kernel/perf_regs.c | 2 +- arch/x86/kernel/process_64.c | 8 +------- 6 files changed, 14 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 59c6c40..acdee09 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len) sp = task_pt_regs(current)->sp; } else { /* -128 for the x32 ABI redzone */ - sp = this_cpu_read(old_rsp) - 128; + sp = task_pt_regs(current)->sp - 128; } return (void __user *)round_down(sp - len, 16); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a092a0c..ce4aadf 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -474,7 +474,6 @@ struct thread_struct { #ifdef CONFIG_X86_32 unsigned long sysenter_cs; #else - unsigned long usersp; /* Copy from PDA */ unsigned short es; unsigned short ds; unsigned short fsindex; @@ -935,11 +934,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); -/* - * User space RSP while inside the SYSCALL fast path - */ -DECLARE_PER_CPU(unsigned long, old_rsp); - #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 4077d96..74bb2e0 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -145,12 +145,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs) #endif } -#define current_user_stack_pointer() this_cpu_read(old_rsp) -/* ia32 vs. x32 difference */ -#define compat_user_stack_pointer() \ - (test_thread_flag(TIF_IA32) \ - ? current_pt_regs()->sp \ - : this_cpu_read(old_rsp)) +#define current_user_stack_pointer() current_pt_regs()->sp +#define compat_user_stack_pointer() current_pt_regs()->sp #endif #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2fd9349..c63a582 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -128,8 +128,6 @@ ENDPROC(native_usergs_sysret64) * manipulation. */ .macro FIXUP_TOP_OF_STACK tmp offset=0 - movq PER_CPU_VAR(old_rsp),\tmp - movq \tmp,RSP+\offset(%rsp) movq $__USER_DS,SS+\offset(%rsp) movq $__USER_CS,CS+\offset(%rsp) movq RIP+\offset(%rsp),\tmp /* get rip */ @@ -139,8 +137,7 @@ ENDPROC(native_usergs_sysret64) .endm .macro RESTORE_TOP_OF_STACK tmp offset=0 - movq RSP+\offset(%rsp),\tmp - movq \tmp,PER_CPU_VAR(old_rsp) + /* nothing to do */ .endm /* @@ -222,9 +219,6 @@ ENDPROC(native_usergs_sysret64) * Interrupts are off on entry. * Only called from user space. * - * XXX if we had a free scratch register we could save the RSP into the stack frame - * and report it properly in ps. Unfortunately we haven't. - * * When user can change the frames always force IRET. That is because * it deals with uncanonical addresses better. SYSRET has trouble * with them due to bugs in both AMD and Intel CPUs. @@ -253,11 +247,13 @@ GLOBAL(system_call_after_swapgs) */ ENABLE_INTERRUPTS(CLBR_NONE) ALLOC_PT_GPREGS_ON_STACK 6*8 /* 6*8: space for orig_ax and iret frame */ + movq %rcx,RIP(%rsp) + movq %r11,EFLAGS(%rsp) + movq PER_CPU_VAR(old_rsp),%rcx + movq %rcx,RSP(%rsp) + movq_cfi rax,ORIG_RAX SAVE_C_REGS_EXCEPT_RAX_RCX_R11 movq $-ENOSYS,RAX(%rsp) - movq_cfi rax,ORIG_RAX - movq %r11,EFLAGS(%rsp) - movq %rcx,RIP(%rsp) CFI_REL_OFFSET rip,RIP testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS) jnz tracesys @@ -293,7 +289,7 @@ ret_from_sys_call: CFI_REGISTER rip,rcx movq EFLAGS(%rsp),%r11 /*CFI_REGISTER rflags,r11*/ - movq PER_CPU_VAR(old_rsp), %rsp + movq RSP(%rsp),%rsp /* * 64bit SYSRET restores rip from rcx, * rflags from r11 (but RF and VM bits are forced to 0), @@ -307,7 +303,7 @@ int_ret_from_sys_call_fixup: FIXUP_TOP_OF_STACK %r11 jmp int_ret_from_sys_call - /* Do syscall tracing */ + /* Do syscall entry tracing */ tracesys: movq %rsp, %rdi movl $AUDIT_ARCH_X86_64, %esi @@ -346,7 +342,7 @@ tracesys_phase2: /* * Syscall return path ending with IRET. - * Has correct top of stack, but partial stack frame. + * Has correct iret frame. */ GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 781861c..02a8720 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -177,7 +177,7 @@ void perf_get_regs_user(struct perf_regs *regs_user, * than just blindly copying user_regs. */ regs_user->abi = PERF_SAMPLE_REGS_ABI_64; - regs_user_copy->sp = this_cpu_read(old_rsp); + regs_user_copy->sp = user_regs->sp; regs_user_copy->cs = __USER_CS; regs_user_copy->ss = __USER_DS; regs_user_copy->cx = -1; /* usually contains garbage */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 975d342..ab79139 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; childregs = task_pt_regs(p); p->thread.sp = (unsigned long) childregs; - p->thread.usersp = me->thread.usersp; set_tsk_thread_flag(p, TIF_FORK); p->thread.io_bitmap_ptr = NULL; @@ -235,10 +234,8 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, loadsegment(es, _ds); loadsegment(ds, _ds); load_gs_index(0); - current->thread.usersp = new_sp; regs->ip = new_ip; regs->sp = new_sp; - this_cpu_write(old_rsp, new_sp); regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; @@ -401,8 +398,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - prev->usersp = this_cpu_read(old_rsp); - this_cpu_write(old_rsp, next->usersp); this_cpu_write(current_task, next_p); /* @@ -601,6 +596,5 @@ long sys_arch_prctl(int code, unsigned long addr) unsigned long KSTK_ESP(struct task_struct *task) { - return (test_tsk_thread_flag(task, TIF_IA32)) ? - (task_pt_regs(task)->sp) : ((task)->thread.usersp); + return task_pt_regs(task)->sp; } -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/