Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752848AbeADSTJ (ORCPT + 1 other); Thu, 4 Jan 2018 13:19:09 -0500 Received: from mga06.intel.com ([134.134.136.31]:8415 "EHLO mga06.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752481AbeADSSF (ORCPT ); Thu, 4 Jan 2018 13:18:05 -0500 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,315,1511856000"; d="scan'208";a="21446978" From: Tim Chen To: Thomas Gleixner , Andy Lutomirski , Linus Torvalds , Greg KH Cc: Tim Chen , Dave Hansen , Andrea Arcangeli , Andi Kleen , Arjan Van De Ven , linux-kernel@vger.kernel.org Subject: [PATCH 3/7] x86/enter: Use IBRS on syscall and interrupts Date: Thu, 4 Jan 2018 09:56:44 -0800 Message-Id: <0c525c4c6c817e9c42c7ed583d86dc591a86efde.1515086770.git.tim.c.chen@linux.intel.com> X-Mailer: git-send-email 2.9.4 In-Reply-To: References: In-Reply-To: References: Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: Set IBRS upon kernel entrance via syscall and interrupts. Clear it upon exit. If NMI runs when exiting kernel between IBRS_DISABLE and SWAPGS, the NMI would have turned on IBRS bit 0 and then it would have left enabled when exiting the NMI. IBRS bit 0 would then be left enabled in userland until the next enter kernel. That is a minor inefficiency only, but we can eliminate it by saving the MSR when entering the NMI in save_paranoid and restoring it when exiting the NMI. Signed-off-by: Andrea Arcangeli Signed-off-by: Tim Chen --- arch/x86/entry/entry_64.S | 24 ++++++++++++++++++++++++ arch/x86/entry/entry_64_compat.S | 9 +++++++++ 2 files changed, 33 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3f72f5c..0c4d542 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "calling.h" @@ -170,6 +171,8 @@ ENTRY(entry_SYSCALL_64_trampoline) /* Load the top of the task stack into RSP */ movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp + /* Stack is usable, use the non-clobbering IBRS enable: */ + ENABLE_IBRS /* Start building the simulated IRET frame. */ pushq $__USER_DS /* pt_regs->ss */ @@ -213,6 +216,8 @@ ENTRY(entry_SYSCALL_64) * is not required to switch CR3. */ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + /* Stack is usable, use the non-clobbering IBRS enable: */ + ENABLE_IBRS TRACE_IRQS_OFF @@ -407,6 +412,7 @@ syscall_return_via_sysret: * We are on the trampoline stack. All regs except RDI are live. * We can do future final exit work right here. */ + DISABLE_IBRS SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi popq %rdi @@ -745,6 +751,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) * We can do future final exit work right here. */ + DISABLE_IBRS SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi /* Restore RDI. */ @@ -832,6 +839,14 @@ native_irq_return_ldt: SWAPGS /* to kernel GS */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ + /* + * Normally we enable IBRS when we switch to kernel's CR3. + * But we are going to switch back to user CR3 immediately + * in this routine after fixing ESPFIX stack. There is + * no vulnerable code branching for IBRS to protect. + * We don't toggle IBRS to avoid the cost of two MSR writes. + */ + movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ movq (1*8)(%rsp), %rax /* user RIP */ @@ -965,6 +980,8 @@ ENTRY(switch_to_thread_stack) SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi movq %rsp, %rdi movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + /* Stack is usable, use the non-clobbering IBRS enable: */ + ENABLE_IBRS UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI pushq 7*8(%rdi) /* regs->ss */ @@ -1265,6 +1282,7 @@ ENTRY(paranoid_entry) 1: SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + ENABLE_IBRS_SAVE_AND_CLOBBER save_reg=%r13d ret END(paranoid_entry) @@ -1288,6 +1306,7 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ + RESTORE_IBRS_CLOBBER save_reg=%r13d RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore @@ -1318,6 +1337,7 @@ ENTRY(error_entry) SWAPGS /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + ENABLE_IBRS_CLOBBER .Lerror_entry_from_usermode_after_swapgs: /* Put us onto the real thread stack. */ @@ -1365,6 +1385,7 @@ ENTRY(error_entry) */ SWAPGS SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + ENABLE_IBRS_CLOBBER jmp .Lerror_entry_done .Lbstep_iret: @@ -1379,6 +1400,7 @@ ENTRY(error_entry) */ SWAPGS SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + ENABLE_IBRS /* * Pretend that the exception came from user mode: set up pt_regs @@ -1480,6 +1502,7 @@ ENTRY(nmi) SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + ENABLE_IBRS UNWIND_HINT_IRET_REGS base=%rdx offset=8 pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ @@ -1730,6 +1753,7 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + RESTORE_IBRS_CLOBBER save_reg=%r13d RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 testl %ebx, %ebx /* swapgs needed? */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 40f1700..88ee1c0 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,7 @@ ENTRY(entry_SYSENTER_compat) SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + ENABLE_IBRS /* * User tracing code (ptrace or signal handlers) might assume that @@ -224,6 +226,7 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) * preserved during the C calls inside TRACE_IRQS_OFF anyway. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + ENABLE_IBRS_CLOBBER /* clobbers %rax, %rcx, %rdx */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -240,6 +243,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) /* Opportunistic SYSRET */ sysret32_from_system_call: TRACE_IRQS_ON /* User mode traces as IRQs on. */ + /* + * Clobber of %rax, %rcx, %rdx is OK before register restoring. + * This is safe to do here because we have no indirect branches + * between here and the return to userspace (sysretl). + */ + DISABLE_IBRS_CLOBBER movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ -- 2.9.4