Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756964AbcCUQRO (ORCPT ); Mon, 21 Mar 2016 12:17:14 -0400 Received: from mga02.intel.com ([134.134.136.20]:60537 "EHLO mga02.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756751AbcCUQQR (ORCPT ); Mon, 21 Mar 2016 12:16:17 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.24,372,1455004800"; d="scan'208";a="928698045" From: Andi Kleen To: x86@kernel.org Cc: luto@amacapital.net, linux-kernel@vger.kernel.org, Andi Kleen Subject: [PATCH 6/9] x86: Use rd/wr fs/gs base in arch_prctl Date: Mon, 21 Mar 2016 09:16:06 -0700 Message-Id: <1458576969-13309-7-git-send-email-andi@firstfloor.org> X-Mailer: git-send-email 2.5.5 In-Reply-To: <1458576969-13309-1-git-send-email-andi@firstfloor.org> References: <1458576969-13309-1-git-send-email-andi@firstfloor.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4019 Lines: 133 From: Andi Kleen Convert arch_prctl to use the new instructions to change fs/gs if available, instead of using MSRs. This is merely a small performance optimization, no new functionality. With the new instructions the syscall is really obsolete, as everything can be set directly in ring 3. But the syscall is widely used by existing software, so we still support it. The syscall still enforces that the addresses are not in kernel space, even though that is not needed more. This is mainly so that the programs written for new CPUs do not suddenly fail on old CPUs. v2: Make kprobes safe v3: Rename things. Signed-off-by: Andi Kleen --- arch/x86/kernel/process_64.c | 48 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 53fa839..5f40517 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -530,20 +530,38 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); +static noinline __kprobes void reload_user_gs(unsigned long addr) +{ + local_irq_disable(); + swapgs(); + loadsegment(gs, 0); + wrgsbase(addr); + swapgs(); + local_irq_enable(); +} + long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) { int ret = 0; int doit = task == current; int cpu; + int fast_seg = boot_cpu_has(X86_FEATURE_FSGSBASE); switch (code) { case ARCH_SET_GS: + /* + * With fast_seg we don't need that check anymore, + * but keep it so that programs do not suddenly + * start failing when run on older CPUs. + * If you really want to set a address in kernel space + * use WRGSBASE directly. + */ if (addr >= TASK_SIZE_OF(task)) return -EPERM; cpu = get_cpu(); /* handle small bases via the GDT because that's faster to switch. */ - if (addr <= 0xffffffff) { + if (addr <= 0xffffffff && !fast_seg) { set_32bit_tls(task, GS_TLS, addr); if (doit) { load_TLS(&task->thread, cpu); @@ -555,8 +573,12 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) task->thread.gsindex = 0; task->thread.gs = addr; if (doit) { - load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); + if (fast_seg) { + reload_user_gs(addr); + } else { + load_gs_index(0); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); + } } } put_cpu(); @@ -569,7 +591,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) cpu = get_cpu(); /* handle small bases via the GDT because that's faster to switch. */ - if (addr <= 0xffffffff) { + if (addr <= 0xffffffff && !fast_seg) { set_32bit_tls(task, FS_TLS, addr); if (doit) { load_TLS(&task->thread, cpu); @@ -584,7 +606,10 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) /* set the selector to 0 to not confuse __switch_to */ loadsegment(fs, 0); - ret = wrmsrl_safe(MSR_FS_BASE, addr); + if (fast_seg) + wrfsbase(addr); + else + ret = wrmsrl_safe(MSR_FS_BASE, addr); } } put_cpu(); @@ -593,6 +618,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) unsigned long base; if (task->thread.fsindex == FS_TLS_SEL) base = read_32bit_tls(task, FS_TLS); + else if (doit && fast_seg) + base = rdfsbase(); else if (doit) rdmsrl(MSR_FS_BASE, base); else @@ -607,9 +634,14 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) base = read_32bit_tls(task, GS_TLS); else if (doit) { savesegment(gs, gsindex); - if (gsindex) - rdmsrl(MSR_KERNEL_GS_BASE, base); - else + if (gsindex) { + if (fast_seg) { + local_irq_disable(); + base = read_user_gsbase(); + local_irq_enable(); + } else + rdmsrl(MSR_KERNEL_GS_BASE, base); + } else base = task->thread.gs; } else base = task->thread.gs; -- 2.5.5