Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753030AbeADUQi (ORCPT + 1 other); Thu, 4 Jan 2018 15:16:38 -0500 Received: from mail.linuxfoundation.org ([140.211.169.12]:57738 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751829AbeADUQg (ORCPT ); Thu, 4 Jan 2018 15:16:36 -0500 Date: Thu, 4 Jan 2018 21:16:40 +0100 From: Greg KH To: Tim Chen Cc: Thomas Gleixner , Andy Lutomirski , Linus Torvalds , Dave Hansen , Andrea Arcangeli , Andi Kleen , Arjan Van De Ven , linux-kernel@vger.kernel.org Subject: Re: [PATCH 6/7] x86/spec_ctrl: Add sysctl knobs to enable/disable SPEC_CTRL feature Message-ID: <20180104201640.GD30228@kroah.com> References: <4d4b3752e8e533201c6983d8473eea95c747ea33.1515086770.git.tim.c.chen@linux.intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <4d4b3752e8e533201c6983d8473eea95c747ea33.1515086770.git.tim.c.chen@linux.intel.com> User-Agent: Mutt/1.9.2 (2017-12-15) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: On Thu, Jan 04, 2018 at 09:56:47AM -0800, Tim Chen wrote: > There are 2 ways to control IBRS > > 1. At boot time > noibrs kernel boot parameter will disable IBRS usage > > Otherwise if the above parameters are not specified, the system > will enable ibrs and ibpb usage if the cpu supports it. > > 2. At run time > echo 0 > /sys/kernel/debug/ibrs_enabled will turn off IBRS > echo 1 > /sys/kernel/debug/ibrs_enabled will turn on IBRS in kernel > echo 2 > /sys/kernel/debug/ibrs_enabled will turn on IBRS in both userspace and kernel > > The implementation was updated with input from Andrea Arcangeli. > > Signed-off-by: Tim Chen > --- > Documentation/admin-guide/kernel-parameters.txt | 4 + > arch/x86/include/asm/spec_ctrl.h | 163 +++++++++++++++++++----- > arch/x86/kernel/cpu/Makefile | 1 + > arch/x86/kernel/cpu/scattered.c | 10 ++ > arch/x86/kernel/cpu/spec_ctrl.c | 124 ++++++++++++++++++ > 5 files changed, 270 insertions(+), 32 deletions(-) > create mode 100644 arch/x86/kernel/cpu/spec_ctrl.c > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index 5dfd262..d64f49f 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -2573,6 +2573,10 @@ > noexec=on: enable non-executable mappings (default) > noexec=off: disable non-executable mappings > > + noibrs [X86] > + Don't use indirect branch restricted speculation (IBRS) > + feature. > + > nosmap [X86] > Disable SMAP (Supervisor Mode Access Prevention) > even if it is supported by processor. > diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h > index 23b2804..2c35571 100644 > --- a/arch/x86/include/asm/spec_ctrl.h > +++ b/arch/x86/include/asm/spec_ctrl.h > @@ -1,13 +1,17 @@ > #ifndef _ASM_X86_SPEC_CTRL_H > #define _ASM_X86_SPEC_CTRL_H > > -#include > #include > #include > -#include > + > +#define SPEC_CTRL_IBRS_INUSE (1<<0) /* OS enables IBRS usage */ > +#define SPEC_CTRL_IBRS_SUPPORTED (1<<1) /* System supports IBRS */ > +#define SPEC_CTRL_IBRS_ADMIN_DISABLED (1<<2) /* Admin disables IBRS */ > > #ifdef __ASSEMBLY__ > > +.extern spec_ctrl_ibrs > + > .macro PUSH_MSR_REGS > pushq %rax > pushq %rcx > @@ -27,35 +31,63 @@ > .endm > > .macro ENABLE_IBRS > - ALTERNATIVE "jmp 10f", "", X86_FEATURE_SPEC_CTRL > + testl $SPEC_CTRL_IBRS_INUSE, spec_ctrl_ibrs > + jz .Lskip_\@ > + > PUSH_MSR_REGS > WRMSR_ASM $MSR_IA32_SPEC_CTRL, $SPEC_CTRL_FEATURE_ENABLE_IBRS > POP_MSR_REGS > -10: > + > + jmp .Ldone_\@ > +.Lskip_\@: > + /* > + * prevent speculation beyond here as we could want to > + * stop speculation by enabling IBRS > + */ > + lfence > +.Ldone_\@: > .endm > > .macro DISABLE_IBRS > - ALTERNATIVE "jmp 10f", "", X86_FEATURE_SPEC_CTRL > + testl $SPEC_CTRL_IBRS_INUSE, spec_ctrl_ibrs > + jz .Lskip_\@ > + > PUSH_MSR_REGS > WRMSR_ASM $MSR_IA32_SPEC_CTRL, $SPEC_CTRL_FEATURE_DISABLE_IBRS > POP_MSR_REGS > -10: > + > +.Lskip_\@: > .endm > > .macro ENABLE_IBRS_CLOBBER > - ALTERNATIVE "jmp 10f", "", X86_FEATURE_SPEC_CTRL > + testl $SPEC_CTRL_IBRS_INUSE, spec_ctrl_ibrs > + jz .Lskip_\@ > + > WRMSR_ASM $MSR_IA32_SPEC_CTRL, $SPEC_CTRL_FEATURE_ENABLE_IBRS > -10: > + > + jmp .Ldone_\@ > +.Lskip_\@: > + /* > + * prevent speculation beyond here as we could want to > + * stop speculation by enabling IBRS > + */ > + lfence > +.Ldone_\@: > .endm > > .macro DISABLE_IBRS_CLOBBER > - ALTERNATIVE "jmp 10f", "", X86_FEATURE_SPEC_CTRL > + testl $SPEC_CTRL_IBRS_INUSE, spec_ctrl_ibrs > + jz .Lskip_\@ > + > WRMSR_ASM $MSR_IA32_SPEC_CTRL, $SPEC_CTRL_FEATURE_DISABLE_IBRS > -10: > + > +.Lskip_\@: > .endm > > .macro ENABLE_IBRS_SAVE_AND_CLOBBER save_reg:req > - ALTERNATIVE "jmp 10f", "", X86_FEATURE_SPEC_CTRL > + testl $SPEC_CTRL_IBRS_INUSE, spec_ctrl_ibrs > + jz .Lskip_\@ > + > movl $MSR_IA32_SPEC_CTRL, %ecx > rdmsr > movl %eax, \save_reg > @@ -63,22 +95,103 @@ > movl $0, %edx > movl $SPEC_CTRL_FEATURE_ENABLE_IBRS, %eax > wrmsr > -10: > + > + jmp .Ldone_\@ > +.Lskip_\@: > + /* > + * prevent speculation beyond here as we could want to > + * stop speculation by enabling IBRS > + */ > + lfence > +.Ldone_\@: > .endm > > .macro RESTORE_IBRS_CLOBBER save_reg:req > - ALTERNATIVE "jmp 10f", "", X86_FEATURE_SPEC_CTRL > + testl $SPEC_CTRL_IBRS_INUSE, spec_ctrl_ibrs > + jz .Lskip_\@ > + > /* Set IBRS to the value saved in the save_reg */ > movl $MSR_IA32_SPEC_CTRL, %ecx > movl $0, %edx > movl \save_reg, %eax > wrmsr > -10: > + > + jmp .Ldone_\@ > +.Lskip_\@: > + /* > + * prevent speculation beyond here as we could want to > + * stop speculation by enabling IBRS > + */ > + lfence > +.Ldone_\@: > .endm > > #else > #include > > +extern int spec_ctrl_ibrs; > +extern struct mutex spec_ctrl_mutex; > +extern unsigned int ibrs_enabled; > + > +enum { > + IBRS_DISABLED, > + /* in host kernel, disabled in guest and userland */ > + IBRS_ENABLED, > + /* in host kernel and host userland, disabled in guest */ > + IBRS_ENABLED_USER, > + IBRS_MAX = IBRS_ENABLED_USER, Yup, not a bit field, your documentation is wrong :( > +}; > + > + > +static inline void set_ibrs_inuse(void) > +{ > + if (spec_ctrl_ibrs & SPEC_CTRL_IBRS_SUPPORTED) > + spec_ctrl_ibrs |= SPEC_CTRL_IBRS_INUSE; > +} > + > +static inline void clear_ibrs_inuse(void) > +{ > + spec_ctrl_ibrs &= ~SPEC_CTRL_IBRS_INUSE; > +} > + > +static inline int ibrs_inuse(void) > +{ > + if (spec_ctrl_ibrs & SPEC_CTRL_IBRS_INUSE) > + return 1; > + else > + /* > + * prevent speculation beyond here as we could want to > + * stop speculation by enabling IBRS with this check > + */ > + rmb(); > + return 0; > +} > + > +static inline void set_ibrs_supported(void) > +{ > + spec_ctrl_ibrs |= SPEC_CTRL_IBRS_SUPPORTED; > + if (!(spec_ctrl_ibrs & SPEC_CTRL_IBRS_ADMIN_DISABLED)) > + set_ibrs_inuse(); > + else > + /* > + * prevent speculation beyond here as we could want to > + * stop speculation by enabling IBRS > + */ > + rmb(); > +} > + > +static inline void set_ibrs_disabled(void) > +{ > + spec_ctrl_ibrs |= SPEC_CTRL_IBRS_ADMIN_DISABLED; > + if (ibrs_inuse()) > + clear_ibrs_inuse(); > +} > + > +static inline void clear_ibrs_disabled(void) > +{ > + spec_ctrl_ibrs &= ~SPEC_CTRL_IBRS_ADMIN_DISABLED; > +} > + > static inline void __disable_indirect_speculation(void) > { > native_wrmsrl(MSR_IA32_SPEC_CTRL, SPEC_CTRL_FEATURE_ENABLE_IBRS); > @@ -96,21 +209,14 @@ static inline void __enable_indirect_speculation(void) > static inline void unprotected_speculation_begin(void) > { > WARN_ON_ONCE(!irqs_disabled()); > - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) > + if (ibrs_inuse()) > __enable_indirect_speculation(); > } > > static inline void unprotected_speculation_end(void) > { > - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) > + if (ibrs_inuse()) > __disable_indirect_speculation(); > - else > - /* > - * If we intended to disable indirect speculation > - * but come here due to mis-speculation, we need > - * to stop the mis-speculation with rmb. > - */ > - rmb(); > } > > > @@ -121,20 +227,13 @@ static inline void unprotected_speculation_end(void) > */ > static inline void unprotected_firmware_begin(void) > { > - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) > + if (ibrs_inuse()) > __disable_indirect_speculation(); > - else > - /* > - * If we intended to disable indirect speculation > - * but come here due to mis-speculation, we need > - * to stop the mis-speculation with rmb. > - */ > - rmb(); > } > > static inline void unprotected_firmware_end(void) > { > - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) > + if (ibrs_inuse()) > __enable_indirect_speculation(); > } > > diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile > index 90cb82d..a25f1ab 100644 > --- a/arch/x86/kernel/cpu/Makefile > +++ b/arch/x86/kernel/cpu/Makefile > @@ -24,6 +24,7 @@ obj-y += match.o > obj-y += bugs.o > obj-$(CONFIG_CPU_FREQ) += aperfmperf.o > obj-y += cpuid-deps.o > +obj-y += spec_ctrl.o > > obj-$(CONFIG_PROC_FS) += proc.o > obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o > diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c > index bc50c40..2d23a2fe 100644 > --- a/arch/x86/kernel/cpu/scattered.c > +++ b/arch/x86/kernel/cpu/scattered.c > @@ -8,6 +8,7 @@ > #include > > #include > +#include > > struct cpuid_bit { > u16 feature; > @@ -56,6 +57,15 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) > > if (regs[cb->reg] & (1 << cb->bit)) > set_cpu_cap(c, cb->feature); > + > + } > + if (!c->cpu_index) { > + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { > + printk(KERN_INFO "FEATURE SPEC_CTRL Present\n"); > + set_ibrs_supported(); > + if (ibrs_inuse()) > + ibrs_enabled = IBRS_ENABLED; > + } > } > } > > diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c > new file mode 100644 > index 0000000..6946678 > --- /dev/null > +++ b/arch/x86/kernel/cpu/spec_ctrl.c > @@ -0,0 +1,124 @@ > +#include No copyright or SPDX line? At least it doesn't have the old horrid Intel header boilerplate, so I should be thankful of that. But it isn't ok like this either, sorry. > +#include > +#include > +#include > +#include > + > +#include > +#include > + > +/* > + * spec_ctrl_ibrs > + * bit 0 = indicate if ibrs is currently in use > + * bit 1 = indicate if system supports ibrs > + * bit 2 = indicate if admin disables ibrs Why bits and not integer values? Can you mix them? > + */ > + > +int spec_ctrl_ibrs; > +EXPORT_SYMBOL(spec_ctrl_ibrs); Why is this exported? What module will ever need this? And horrid global symbol name, it doesn't say what it is in an obvious way just by looking at it. > + > +/* mutex to serialize IBRS control changes */ > +DEFINE_MUTEX(spec_ctrl_mutex); > +EXPORT_SYMBOL(spec_ctrl_mutex); Why export this? You only ever use it in one place? Again, what module needs it? And I have to ask, EXPORT_SYMBOL_GPL() for this and spec_ctrl_ibrs as well? > +unsigned int ibrs_enabled __read_mostly; > +EXPORT_SYMBOL(ibrs_enabled); Again, what module needs this? And did __read_mostly really matter in performance tests? > + > +static int __init noibrs(char *str) > +{ > + set_ibrs_disabled(); > + > + return 0; > +} > +early_param("noibrs", ex); > + > +static ssize_t __enabled_read(struct file *file, char __user *user_buf, > + size_t count, loff_t *ppos, unsigned int *field) > +{ > + char buf[32]; > + unsigned int len; > + > + len = sprintf(buf, "%d\n", READ_ONCE(*field)); > + return simple_read_from_buffer(user_buf, count, ppos, buf, len); > +} > + > +static ssize_t ibrs_enabled_read(struct file *file, char __user *user_buf, > + size_t count, loff_t *ppos) > +{ > + return __enabled_read(file, user_buf, count, ppos, &ibrs_enabled); > +} It's a single variable, why such complex debugfs for that? This should be handled by a helper macro already, right? > +static void spec_ctrl_flush_all_cpus(u32 msr_nr, u64 val) > +{ > + int cpu; > + get_online_cpus(); > + for_each_online_cpu(cpu) > + wrmsrl_on_cpu(cpu, msr_nr, val); > + put_online_cpus(); > +} > + > +static ssize_t ibrs_enabled_write(struct file *file, > + const char __user *user_buf, > + size_t count, loff_t *ppos) > +{ > + char buf[32]; > + ssize_t len; > + unsigned int enable; > + > + len = min(count, sizeof(buf) - 1); > + if (copy_from_user(buf, user_buf, len)) > + return -EFAULT; > + > + buf[len] = '\0'; > + if (kstrtouint(buf, 0, &enable)) > + return -EINVAL; 3 value "flags" are horrid :( > + > + if (enable > IBRS_MAX) > + return -EINVAL; > + > + mutex_lock(&spec_ctrl_mutex); Always run checkpatch.pl so you don't get grumpy kernel maintainers telling you to run checkpatch.pl :( > + > + if (enable == IBRS_DISABLED) { > + /* disable IBRS usage */ > + set_ibrs_disabled(); > + if (spec_ctrl_ibrs & SPEC_CTRL_IBRS_SUPPORTED) > + spec_ctrl_flush_all_cpus(MSR_IA32_SPEC_CTRL, SPEC_CTRL_FEATURE_DISABLE_IBRS); > + } else if (enable == IBRS_ENABLED) { > + /* enable IBRS usage in kernel */ > + clear_ibrs_disabled(); > + if (spec_ctrl_ibrs & SPEC_CTRL_IBRS_SUPPORTED) > + set_ibrs_inuse(); > + else > + /* Platform don't support IBRS */ > + enable = IBRS_DISABLED; > + } else if (enable == IBRS_ENABLED_USER) { > + /* enable IBRS usage in both userspace and kernel */ > + clear_ibrs_disabled(); > + /* don't change IBRS value once we set it to always on */ > + clear_ibrs_inuse(); > + if (spec_ctrl_ibrs & SPEC_CTRL_IBRS_SUPPORTED) > + spec_ctrl_flush_all_cpus(MSR_IA32_SPEC_CTRL, SPEC_CTRL_FEATURE_ENABLE_IBRS); > + else > + /* Platform don't support IBRS */ > + enable = IBRS_DISABLED; > + } > + > + WRITE_ONCE(ibrs_enabled, enable); It's a debugfs write callback, why do you care about WRITE_ONCE()? thanks, greg k-h