On vmx_vcpu_load_vmcs and svm_vcpu_load, respect user controlled
configuration for conditional IBPB and only attempt IBPB MSR when
switching between different guest vCPUs IFF switch_mm_always_ibpb,
which fixes a situation where the kernel will issue IBPB
unconditionally even when conditional IBPB is enabled.
If a user has spectre_v2_user mitigation enabled, in any
configuration, and the underlying processor supports X86_FEATURE_IBPB,
X86_FEATURE_USE_IBPB is set and any calls to
indirect_branch_prediction_barrier() will issue IBPB MSR.
Depending on the spectre_v2_user configuration, either
switch_mm_always_ibpb key or switch_mm_cond_ibpb key will be set.
Both switch_mm_always_ibpb and switch_mm_cond_ibpb are handled by
switch_mm() -> cond_mitigation(), which works well in cases where
switching vCPUs (i.e. switching tasks) also switches mm_struct;
however, this misses a paranoid case where user space may be running
multiple guests in a single process (i.e. single mm_struct).
This paranoid case is already covered by vmx_vcpu_load_vmcs and
svm_vcpu_load; however, this is done by calling
indirect_branch_prediction_barrier() and thus the kernel
unconditionally issues IBPB if X86_FEATURE_USE_IBPB is set.
Fix by using intermediary call to x86_virt_guest_switch_ibpb(), which
gates IBPB MSR IFF switch_mm_always_ibpb is true. This is useful for
security paranoid VMMs in either single process or multi-process VMM
configurations.
switch_mm_always_ibpb key is user controlled via spectre_v2_user and
will be true for the following configurations:
spectre_v2_user=on
spectre_v2_user=prctl,ibpb
spectre_v2_user=seccomp,ibpb
Signed-off-by: Jon Kohler <[email protected]>
Cc: Sean Christopherson <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Waiman Long <[email protected]>
---
v1 -> v2:
- Addressed comments on approach from Sean.
arch/x86/include/asm/spec-ctrl.h | 15 +++++++++++++++
arch/x86/kernel/cpu/bugs.c | 6 +++++-
arch/x86/kvm/svm/svm.c | 2 +-
arch/x86/kvm/vmx/vmx.c | 2 +-
4 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 5393babc0598..1ad140b17ad7 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -85,4 +85,19 @@ static inline void speculative_store_bypass_ht_init(void) { }
extern void speculation_ctrl_update(unsigned long tif);
extern void speculation_ctrl_update_current(void);
+/*
+ * Issue IBPB when switching guest vCPUs IFF if switch_mm_always_ibpb.
+ * Primarily useful for security paranoid (or naive) user space VMMs
+ * that may run multiple VMs within a single process.
+ * For multi-process VMMs, switching vCPUs, i.e. switching tasks,
+ * will also switch mm_structs and thus do IPBP via cond_mitigation();
+ * however, in the always_ibpb case, take a paranoid approach and issue
+ * IBPB on both switch_mm() and vCPU switch.
+ */
+static inline void x86_virt_guest_switch_ibpb(void)
+{
+ if (static_branch_unlikely(&switch_mm_always_ibpb))
+ indirect_branch_prediction_barrier();
+}
+
#endif
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 6296e1ebed1d..6aafb0279cbc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -68,8 +68,12 @@ u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
/* Control conditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
-/* Control unconditional IBPB in switch_mm() */
+/* Control unconditional IBPB in both switch_mm() and
+ * x86_virt_guest_switch_ibpb().
+ * See notes on x86_virt_guest_switch_ibpb() for KVM use case details.
+ */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
+EXPORT_SYMBOL_GPL(switch_mm_always_ibpb);
/* Control MDS CPU buffer clear before returning to user space */
DEFINE_STATIC_KEY_FALSE(mds_user_clear);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index bd4c64b362d2..fc08c94df888 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1302,7 +1302,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (sd->current_vmcb != svm->vmcb) {
sd->current_vmcb = svm->vmcb;
- indirect_branch_prediction_barrier();
+ x86_virt_guest_switch_ibpb();
}
if (kvm_vcpu_apicv_active(vcpu))
__avic_vcpu_load(vcpu, cpu);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 04d170c4b61e..a8eed9b8221b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1270,7 +1270,7 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
* The L1 VMM can protect itself with retpolines, IBPB or IBRS.
*/
if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev))
- indirect_branch_prediction_barrier();
+ x86_virt_guest_switch_ibpb();
}
if (!already_loaded) {
--
2.30.1 (Apple Git-130)
On Mon, Apr 18, 2022, Jon Kohler wrote:
> On vmx_vcpu_load_vmcs and svm_vcpu_load, respect user controlled
> configuration for conditional IBPB and only attempt IBPB MSR when
> switching between different guest vCPUs IFF switch_mm_always_ibpb,
> which fixes a situation where the kernel will issue IBPB
> unconditionally even when conditional IBPB is enabled.
>
> If a user has spectre_v2_user mitigation enabled, in any
> configuration, and the underlying processor supports X86_FEATURE_IBPB,
> X86_FEATURE_USE_IBPB is set and any calls to
> indirect_branch_prediction_barrier() will issue IBPB MSR.
>
> Depending on the spectre_v2_user configuration, either
> switch_mm_always_ibpb key or switch_mm_cond_ibpb key will be set.
>
> Both switch_mm_always_ibpb and switch_mm_cond_ibpb are handled by
> switch_mm() -> cond_mitigation(), which works well in cases where
> switching vCPUs (i.e. switching tasks) also switches mm_struct;
> however, this misses a paranoid case where user space may be running
> multiple guests in a single process (i.e. single mm_struct).
>
> This paranoid case is already covered by vmx_vcpu_load_vmcs and
> svm_vcpu_load; however, this is done by calling
> indirect_branch_prediction_barrier() and thus the kernel
> unconditionally issues IBPB if X86_FEATURE_USE_IBPB is set.
The changelog should call out that switch_mm_cond_ibpb is intentionally "ignored"
for the virt case, and explain why it's nonsensical to emit IBPB in that scenario.
> Fix by using intermediary call to x86_virt_guest_switch_ibpb(), which
> gates IBPB MSR IFF switch_mm_always_ibpb is true. This is useful for
> security paranoid VMMs in either single process or multi-process VMM
> configurations.
Multi-process VMM? KVM doesn't allow "sharing" a VM across processes. Userspace
can share guest memory across processes, but that's not relevant to an IBPB on
guest switch. I suspect you're loosely referring to all of userspace as a single
VMM. That's inaccurate, or at least unnecessarily confusing, from a kernel
perspective. I am not aware of a VMM that runs as a monolithic "daemon" and forks
a new process for every VM. And even in such a case, I would argue that most
people would refer to each process as a separate VMM.
If there's a blurb about the switch_mm_cond_ibpb case being nonsensical, there's
probably a good segue into stating the new behavior.
> switch_mm_always_ibpb key is user controlled via spectre_v2_user and
> will be true for the following configurations:
> spectre_v2_user=on
> spectre_v2_user=prctl,ibpb
> spectre_v2_user=seccomp,ibpb
>
> Signed-off-by: Jon Kohler <[email protected]>
> Cc: Sean Christopherson <[email protected]>
> Cc: Andrea Arcangeli <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Josh Poimboeuf <[email protected]>
> Cc: Waiman Long <[email protected]>
> ---
> v1 -> v2:
> - Addressed comments on approach from Sean.
>
> arch/x86/include/asm/spec-ctrl.h | 15 +++++++++++++++
> arch/x86/kernel/cpu/bugs.c | 6 +++++-
> arch/x86/kvm/svm/svm.c | 2 +-
> arch/x86/kvm/vmx/vmx.c | 2 +-
> 4 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
> index 5393babc0598..1ad140b17ad7 100644
> --- a/arch/x86/include/asm/spec-ctrl.h
> +++ b/arch/x86/include/asm/spec-ctrl.h
> @@ -85,4 +85,19 @@ static inline void speculative_store_bypass_ht_init(void) { }
> extern void speculation_ctrl_update(unsigned long tif);
> extern void speculation_ctrl_update_current(void);
>
> +/*
> + * Issue IBPB when switching guest vCPUs IFF if switch_mm_always_ibpb.
Extra "if" there.
> + * Primarily useful for security paranoid (or naive) user space VMMs
> + * that may run multiple VMs within a single process.
> + * For multi-process VMMs, switching vCPUs, i.e. switching tasks,
As above, "multi-process VMMs" is very confusing, they're really just separate VMMs.
Something like this?
* For the more common case of running VMs in their own dedicated process,
* switching vCPUs that belong to different VMs, i.e. switching tasks, will also
* ...
> + * will also switch mm_structs and thus do IPBP via cond_mitigation();
> + * however, in the always_ibpb case, take a paranoid approach and issue
> + * IBPB on both switch_mm() and vCPU switch.
> + */
> +static inline void x86_virt_guest_switch_ibpb(void)
> +{
> + if (static_branch_unlikely(&switch_mm_always_ibpb))
> + indirect_branch_prediction_barrier();
> +}
> +
> #endif
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index 6296e1ebed1d..6aafb0279cbc 100644
> On Apr 21, 2022, at 11:20 AM, Sean Christopherson <[email protected]> wrote:
>
> On Mon, Apr 18, 2022, Jon Kohler wrote:
>> On vmx_vcpu_load_vmcs and svm_vcpu_load, respect user controlled
>> configuration for conditional IBPB and only attempt IBPB MSR when
>> switching between different guest vCPUs IFF switch_mm_always_ibpb,
>> which fixes a situation where the kernel will issue IBPB
>> unconditionally even when conditional IBPB is enabled.
>>
>> If a user has spectre_v2_user mitigation enabled, in any
>> configuration, and the underlying processor supports X86_FEATURE_IBPB,
>> X86_FEATURE_USE_IBPB is set and any calls to
>> indirect_branch_prediction_barrier() will issue IBPB MSR.
>>
>> Depending on the spectre_v2_user configuration, either
>> switch_mm_always_ibpb key or switch_mm_cond_ibpb key will be set.
>>
>> Both switch_mm_always_ibpb and switch_mm_cond_ibpb are handled by
>> switch_mm() -> cond_mitigation(), which works well in cases where
>> switching vCPUs (i.e. switching tasks) also switches mm_struct;
>> however, this misses a paranoid case where user space may be running
>> multiple guests in a single process (i.e. single mm_struct).
>>
>> This paranoid case is already covered by vmx_vcpu_load_vmcs and
>> svm_vcpu_load; however, this is done by calling
>> indirect_branch_prediction_barrier() and thus the kernel
>> unconditionally issues IBPB if X86_FEATURE_USE_IBPB is set.
>
> The changelog should call out that switch_mm_cond_ibpb is intentionally "ignored"
> for the virt case, and explain why it's nonsensical to emit IBPB in that scenario.
Ok will do, thanks
>
>> Fix by using intermediary call to x86_virt_guest_switch_ibpb(), which
>> gates IBPB MSR IFF switch_mm_always_ibpb is true. This is useful for
>> security paranoid VMMs in either single process or multi-process VMM
>> configurations.
>
> Multi-process VMM? KVM doesn't allow "sharing" a VM across processes. Userspace
> can share guest memory across processes, but that's not relevant to an IBPB on
> guest switch. I suspect you're loosely referring to all of userspace as a single
> VMM. That's inaccurate, or at least unnecessarily confusing, from a kernel
> perspective. I am not aware of a VMM that runs as a monolithic "daemon" and forks
> a new process for every VM. And even in such a case, I would argue that most
> people would refer to each process as a separate VMM.
>
> If there's a blurb about the switch_mm_cond_ibpb case being nonsensical, there's
> probably a good segue into stating the new behavior.
Yea, thats what I was getting at but failed to wordsmith it nicely. I’ll sharpen it up
and integrate your feedback into a v3
>
>> switch_mm_always_ibpb key is user controlled via spectre_v2_user and
>> will be true for the following configurations:
>> spectre_v2_user=on
>> spectre_v2_user=prctl,ibpb
>> spectre_v2_user=seccomp,ibpb
>>
>> Signed-off-by: Jon Kohler <[email protected]>
>> Cc: Sean Christopherson <[email protected]>
>> Cc: Andrea Arcangeli <[email protected]>
>> Cc: Kees Cook <[email protected]>
>> Cc: Josh Poimboeuf <[email protected]>
>> Cc: Waiman Long <[email protected]>
>> ---
>> v1 -> v2:
>> - Addressed comments on approach from Sean.
>>
>> arch/x86/include/asm/spec-ctrl.h | 15 +++++++++++++++
>> arch/x86/kernel/cpu/bugs.c | 6 +++++-
>> arch/x86/kvm/svm/svm.c | 2 +-
>> arch/x86/kvm/vmx/vmx.c | 2 +-
>> 4 files changed, 22 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
>> index 5393babc0598..1ad140b17ad7 100644
>> --- a/arch/x86/include/asm/spec-ctrl.h
>> +++ b/arch/x86/include/asm/spec-ctrl.h
>> @@ -85,4 +85,19 @@ static inline void speculative_store_bypass_ht_init(void) { }
>> extern void speculation_ctrl_update(unsigned long tif);
>> extern void speculation_ctrl_update_current(void);
>>
>> +/*
>> + * Issue IBPB when switching guest vCPUs IFF if switch_mm_always_ibpb.
>
> Extra "if" there.
>
>> + * Primarily useful for security paranoid (or naive) user space VMMs
>> + * that may run multiple VMs within a single process.
>> + * For multi-process VMMs, switching vCPUs, i.e. switching tasks,
>
> As above, "multi-process VMMs" is very confusing, they're really just separate VMMs.
> Something like this?
>
> * For the more common case of running VMs in their own dedicated process,
> * switching vCPUs that belong to different VMs, i.e. switching tasks, will also
> * ...
>
>> + * will also switch mm_structs and thus do IPBP via cond_mitigation();
>> + * however, in the always_ibpb case, take a paranoid approach and issue
>> + * IBPB on both switch_mm() and vCPU switch.
>> + */
>> +static inline void x86_virt_guest_switch_ibpb(void)
>> +{
>> + if (static_branch_unlikely(&switch_mm_always_ibpb))
>> + indirect_branch_prediction_barrier();
>> +}
>> +
>> #endif
>> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
>> index 6296e1ebed1d..6aafb0279cbc 100644