2020-04-02 13:46:27

by Thomas Gleixner

[permalink] [raw]
Subject: [patch 2/2] x86/kvm/vmx: Prevent split lock detection induced #AC wreckage

Without at least minimal handling for split lock detection induced #AC, VMX
will just run into the same problem as the VMWare hypervisor, which was
reported by Kenneth.

It will inject the #AC blindly into the guest whether the guest is prepared
or not.

Add the minimal required handling for it:

- Check guest state whether CR0.AM is enabled and EFLAGS.AC is set. If
so, then the #AC originated from CPL3 and the guest has is prepared to
handle it. In this case it does not matter whether the #AC is due to a
split lock or a regular unaligned check.

- Invoke a minimal split lock detection handler. If the host SLD mode is
sld_warn, then handle it in the same way as user space handling works:
Emit a warning, disable SLD and mark the current task with TIF_SLD.
With that resume the guest without injecting #AC.

If the host mode is sld_fatal or sld_off, emit a warning and deliver
the exception to user space which can crash and burn itself.

Mark the module with MOD_INFO(sld_safe, "Y") so the module loader does not
force SLD off.

Signed-off-by: Thomas Gleixner <[email protected]>
Cc: "Kenneth R. Crudup" <[email protected]>
Cc: Paolo Bonzini <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Xiaoyao Li <[email protected]>
Cc: Nadav Amit <[email protected]>
Cc: Thomas Hellstrom <[email protected]>
Cc: Sean Christopherson <[email protected]>
Cc: Tony Luck <[email protected]>
---
arch/x86/include/asm/cpu.h | 1 +
arch/x86/kernel/cpu/intel.c | 28 +++++++++++++++++++++++-----
arch/x86/kvm/vmx/vmx.c | 40 +++++++++++++++++++++++++++++++++++++---
3 files changed, 61 insertions(+), 8 deletions(-)

--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -44,6 +44,7 @@ unsigned int x86_stepping(unsigned int s
extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
extern void switch_to_sld(unsigned long tifn);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
+extern bool handle_guest_split_lock(unsigned long ip);
extern void split_lock_validate_module_text(struct module *me, void *text, void *text_end);
#else
static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1102,13 +1102,10 @@ static void split_lock_init(void)
split_lock_verify_msr(sld_state != sld_off);
}

-bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+static void split_lock_warn(unsigned long ip)
{
- if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
- return false;
-
pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
- current->comm, current->pid, regs->ip);
+ current->comm, current->pid, ip);

/*
* Disable the split lock detection for this task so it can make
@@ -1117,6 +1114,27 @@ bool handle_user_split_lock(struct pt_re
*/
sld_update_msr(false);
set_tsk_thread_flag(current, TIF_SLD);
+}
+
+bool handle_guest_split_lock(unsigned long ip)
+{
+ if (sld_state == sld_warn) {
+ split_lock_warn(ip);
+ return true;
+ }
+
+ pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
+ current->comm, current->pid,
+ sld_state == sld_fatal ? "fatal" : "bogus", ip);
+ return false;
+}
+EXPORT_SYMBOL_GPL(handle_guest_split_lock);
+
+bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+{
+ if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
+ return false;
+ split_lock_warn(regs->ip);
return true;
}

--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -65,6 +65,7 @@

MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+MODULE_INFO(sld_safe, "Y");

#ifdef MODULE
static const struct x86_cpu_id vmx_cpu_id[] = {
@@ -4623,6 +4624,22 @@ static int handle_machine_check(struct k
return 1;
}

+static bool guest_handles_ac(struct kvm_vcpu *vcpu)
+{
+ /*
+ * If guest has alignment checking enabled in CR0 and activated in
+ * eflags, then the #AC originated from CPL3 and the guest is able
+ * to handle it. It does not matter whether this is a regular or
+ * a split lock operation induced #AC.
+ */
+ if (vcpu->arch.cr0 & X86_CR0_AM &&
+ vmx_get_rflags(vcpu) & X86_EFLAGS_AC)
+ return true;
+
+ /* Add guest SLD handling checks here once it's supported */
+ return false;
+}
+
static int handle_exception_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4688,9 +4705,6 @@ static int handle_exception_nmi(struct k
return handle_rmode_exception(vcpu, ex_no, error_code);

switch (ex_no) {
- case AC_VECTOR:
- kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
- return 1;
case DB_VECTOR:
dr6 = vmcs_readl(EXIT_QUALIFICATION);
if (!(vcpu->guest_debug &
@@ -4719,6 +4733,26 @@ static int handle_exception_nmi(struct k
kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
kvm_run->debug.arch.exception = ex_no;
break;
+ case AC_VECTOR:
+ if (guest_handles_ac(vcpu)) {
+ kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
+ return 1;
+ }
+ /*
+ * Handle #AC caused by split lock detection. If the host
+ * mode is sld_warn, then it warns, marks current with
+ * TIF_SLD and disables split lock detection. So the guest
+ * can just continue.
+ *
+ * If the host mode is fatal, the handling code warned. Let
+ * qemu kill itself.
+ *
+ * If the host mode is off, then this #AC is bonkers and
+ * something is badly wrong. Let it fail as well.
+ */
+ if (handle_guest_split_lock(kvm_rip_read(vcpu)))
+ return 1;
+ /* fall through */
default:
kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
kvm_run->ex.exception = ex_no;


2020-04-02 15:32:58

by Sean Christopherson

[permalink] [raw]
Subject: Re: [patch 2/2] x86/kvm/vmx: Prevent split lock detection induced #AC wreckage

On Thu, Apr 02, 2020 at 02:33:00PM +0200, Thomas Gleixner wrote:
> Without at least minimal handling for split lock detection induced #AC, VMX
> will just run into the same problem as the VMWare hypervisor, which was
> reported by Kenneth.
>
> It will inject the #AC blindly into the guest whether the guest is prepared
> or not.
>
> Add the minimal required handling for it:
>
> - Check guest state whether CR0.AM is enabled and EFLAGS.AC is set. If
> so, then the #AC originated from CPL3 and the guest has is prepared to
> handle it. In this case it does not matter whether the #AC is due to a
> split lock or a regular unaligned check.
>
> - Invoke a minimal split lock detection handler. If the host SLD mode is
> sld_warn, then handle it in the same way as user space handling works:
> Emit a warning, disable SLD and mark the current task with TIF_SLD.
> With that resume the guest without injecting #AC.
>
> If the host mode is sld_fatal or sld_off, emit a warning and deliver
> the exception to user space which can crash and burn itself.
>
> Mark the module with MOD_INFO(sld_safe, "Y") so the module loader does not
> force SLD off.

Some comments below. But, any objection to taking Xiaoyao's patches that
do effectively the same things, minus the MOD_INFO()? I'll repost them in
reply to this thread.

> Signed-off-by: Thomas Gleixner <[email protected]>
> Cc: "Kenneth R. Crudup" <[email protected]>
> Cc: Paolo Bonzini <[email protected]>
> Cc: Fenghua Yu <[email protected]>
> Cc: Xiaoyao Li <[email protected]>
> Cc: Nadav Amit <[email protected]>
> Cc: Thomas Hellstrom <[email protected]>
> Cc: Sean Christopherson <[email protected]>
> Cc: Tony Luck <[email protected]>
> ---
> arch/x86/include/asm/cpu.h | 1 +
> arch/x86/kernel/cpu/intel.c | 28 +++++++++++++++++++++++-----
> arch/x86/kvm/vmx/vmx.c | 40 +++++++++++++++++++++++++++++++++++++---
> 3 files changed, 61 insertions(+), 8 deletions(-)
>
> --- a/arch/x86/include/asm/cpu.h
> +++ b/arch/x86/include/asm/cpu.h
> @@ -44,6 +44,7 @@ unsigned int x86_stepping(unsigned int s
> extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
> extern void switch_to_sld(unsigned long tifn);
> extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
> +extern bool handle_guest_split_lock(unsigned long ip);
> extern void split_lock_validate_module_text(struct module *me, void *text, void *text_end);
> #else
> static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
> --- a/arch/x86/kernel/cpu/intel.c
> +++ b/arch/x86/kernel/cpu/intel.c
> @@ -1102,13 +1102,10 @@ static void split_lock_init(void)
> split_lock_verify_msr(sld_state != sld_off);
> }
>
> -bool handle_user_split_lock(struct pt_regs *regs, long error_code)
> +static void split_lock_warn(unsigned long ip)
> {
> - if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
> - return false;
> -
> pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
> - current->comm, current->pid, regs->ip);
> + current->comm, current->pid, ip);
>
> /*
> * Disable the split lock detection for this task so it can make
> @@ -1117,6 +1114,27 @@ bool handle_user_split_lock(struct pt_re
> */
> sld_update_msr(false);
> set_tsk_thread_flag(current, TIF_SLD);
> +}
> +
> +bool handle_guest_split_lock(unsigned long ip)
> +{
> + if (sld_state == sld_warn) {
> + split_lock_warn(ip);
> + return true;
> + }
> +
> + pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
> + current->comm, current->pid,
> + sld_state == sld_fatal ? "fatal" : "bogus", ip);
> + return false;
> +}
> +EXPORT_SYMBOL_GPL(handle_guest_split_lock);
> +
> +bool handle_user_split_lock(struct pt_regs *regs, long error_code)
> +{
> + if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
> + return false;
> + split_lock_warn(regs->ip);
> return true;
> }
>
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -65,6 +65,7 @@
>
> MODULE_AUTHOR("Qumranet");
> MODULE_LICENSE("GPL");
> +MODULE_INFO(sld_safe, "Y");
>
> #ifdef MODULE
> static const struct x86_cpu_id vmx_cpu_id[] = {
> @@ -4623,6 +4624,22 @@ static int handle_machine_check(struct k
> return 1;
> }
>
> +static bool guest_handles_ac(struct kvm_vcpu *vcpu)
> +{
> + /*
> + * If guest has alignment checking enabled in CR0 and activated in
> + * eflags, then the #AC originated from CPL3 and the guest is able
> + * to handle it. It does not matter whether this is a regular or
> + * a split lock operation induced #AC.
> + */
> + if (vcpu->arch.cr0 & X86_CR0_AM &&

Technically not required since KVM doesn't let the gets toggle CR0.AM at
will, but going through kvm_read_cr0{_bits}() is preferred.

> + vmx_get_rflags(vcpu) & X86_EFLAGS_AC)

I don't think this is correct. A guest could trigger a split-lock #AC at
CPL0 with EFLAGS.AC=1 and CR0.AM=1, and then panic because it didn't expect
#AC at CPL0.

> + return true;
> +
> + /* Add guest SLD handling checks here once it's supported */
> + return false;
> +}
> +
> static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -4688,9 +4705,6 @@ static int handle_exception_nmi(struct k
> return handle_rmode_exception(vcpu, ex_no, error_code);
>
> switch (ex_no) {
> - case AC_VECTOR:
> - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> - return 1;
> case DB_VECTOR:
> dr6 = vmcs_readl(EXIT_QUALIFICATION);
> if (!(vcpu->guest_debug &
> @@ -4719,6 +4733,26 @@ static int handle_exception_nmi(struct k
> kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
> kvm_run->debug.arch.exception = ex_no;
> break;
> + case AC_VECTOR:
> + if (guest_handles_ac(vcpu)) {
> + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> + return 1;
> + }
> + /*
> + * Handle #AC caused by split lock detection. If the host
> + * mode is sld_warn, then it warns, marks current with
> + * TIF_SLD and disables split lock detection. So the guest
> + * can just continue.
> + *
> + * If the host mode is fatal, the handling code warned. Let
> + * qemu kill itself.
> + *
> + * If the host mode is off, then this #AC is bonkers and
> + * something is badly wrong. Let it fail as well.
> + */
> + if (handle_guest_split_lock(kvm_rip_read(vcpu)))
> + return 1;
> + /* fall through */
> default:
> kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
> kvm_run->ex.exception = ex_no;
>

2020-04-02 15:58:36

by Sean Christopherson

[permalink] [raw]
Subject: [PATCH 0/3] x86: KVM: VMX: Add basic split-lock #AC handling

First three patches from Xiaoyao's series to add split-lock #AC support
in KVM.

Xiaoyao Li (3):
KVM: x86: Emulate split-lock access as a write in emulator
x86/split_lock: Refactor and export handle_user_split_lock() for KVM
KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in
guest

arch/x86/include/asm/cpu.h | 4 ++--
arch/x86/kernel/cpu/intel.c | 7 ++++---
arch/x86/kernel/traps.c | 2 +-
arch/x86/kvm/vmx/vmx.c | 30 +++++++++++++++++++++++++++---
arch/x86/kvm/x86.c | 12 +++++++++++-
5 files changed, 45 insertions(+), 10 deletions(-)

--
2.24.1

2020-04-02 16:08:17

by Sean Christopherson

[permalink] [raw]
Subject: Re: [patch 2/2] x86/kvm/vmx: Prevent split lock detection induced #AC wreckage

On Thu, Apr 02, 2020 at 03:44:00PM +0000, Nadav Amit wrote:
> > On Apr 2, 2020, at 8:30 AM, Sean Christopherson <[email protected]> wrote:
> >
> > On Thu, Apr 02, 2020 at 02:33:00PM +0200, Thomas Gleixner wrote:
> >> Without at least minimal handling for split lock detection induced #AC, VMX
> >> will just run into the same problem as the VMWare hypervisor, which was
> >> reported by Kenneth.
> >>
> >> It will inject the #AC blindly into the guest whether the guest is prepared
> >> or not.
> >>
> >> Add the minimal required handling for it:
> >>
> >> - Check guest state whether CR0.AM is enabled and EFLAGS.AC is set. If
> >> so, then the #AC originated from CPL3 and the guest has is prepared to
> >> handle it. In this case it does not matter whether the #AC is due to a
> >> split lock or a regular unaligned check.
> >>
> >> - Invoke a minimal split lock detection handler. If the host SLD mode is
> >> sld_warn, then handle it in the same way as user space handling works:
> >> Emit a warning, disable SLD and mark the current task with TIF_SLD.
> >> With that resume the guest without injecting #AC.
> >>
> >> If the host mode is sld_fatal or sld_off, emit a warning and deliver
> >> the exception to user space which can crash and burn itself.
> >>
> >> Mark the module with MOD_INFO(sld_safe, "Y") so the module loader does not
> >> force SLD off.
> >
> > Some comments below. But, any objection to taking Xiaoyao's patches that
> > do effectively the same things, minus the MOD_INFO()? I'll repost them in
> > reply to this thread.
>
> IIUC they also deal with emulated split-lock accesses in the host, during
> instruction emulation [1]. This seems also to be required, although I am not
> sure the approach that he took once emulation encounters a split-lock is
> robust.

Yep. It's "robust" in the sense that KVM won't panic the host. It's not
robust from the perspective that it could possibly hose the guest. But, no
sane, well-behaved guest should reach that particular emulator path on a
split-lock enabled system.

> [1] https://lore.kernel.org/lkml/[email protected]/

2020-04-02 16:47:06

by Nadav Amit

[permalink] [raw]
Subject: Re: [patch 2/2] x86/kvm/vmx: Prevent split lock detection induced #AC wreckage

> On Apr 2, 2020, at 8:30 AM, Sean Christopherson <[email protected]> wrote:
>
> On Thu, Apr 02, 2020 at 02:33:00PM +0200, Thomas Gleixner wrote:
>> Without at least minimal handling for split lock detection induced #AC, VMX
>> will just run into the same problem as the VMWare hypervisor, which was
>> reported by Kenneth.
>>
>> It will inject the #AC blindly into the guest whether the guest is prepared
>> or not.
>>
>> Add the minimal required handling for it:
>>
>> - Check guest state whether CR0.AM is enabled and EFLAGS.AC is set. If
>> so, then the #AC originated from CPL3 and the guest has is prepared to
>> handle it. In this case it does not matter whether the #AC is due to a
>> split lock or a regular unaligned check.
>>
>> - Invoke a minimal split lock detection handler. If the host SLD mode is
>> sld_warn, then handle it in the same way as user space handling works:
>> Emit a warning, disable SLD and mark the current task with TIF_SLD.
>> With that resume the guest without injecting #AC.
>>
>> If the host mode is sld_fatal or sld_off, emit a warning and deliver
>> the exception to user space which can crash and burn itself.
>>
>> Mark the module with MOD_INFO(sld_safe, "Y") so the module loader does not
>> force SLD off.
>
> Some comments below. But, any objection to taking Xiaoyao's patches that
> do effectively the same things, minus the MOD_INFO()? I'll repost them in
> reply to this thread.

IIUC they also deal with emulated split-lock accesses in the host, during
instruction emulation [1]. This seems also to be required, although I am not
sure the approach that he took once emulation encounters a split-lock is
robust.

[1] https://lore.kernel.org/lkml/[email protected]/

2020-04-02 16:52:32

by Sean Christopherson

[permalink] [raw]
Subject: [PATCH 1/3] KVM: x86: Emulate split-lock access as a write in emulator

From: Xiaoyao Li <[email protected]>

Emulate split-lock accesses as writes if split lock detection is on to
avoid #AC during emulation, which will result in a panic(). This should
never occur for a well behaved guest, but a malicious guest can
manipulate the TLB to trigger emulation of a locked instruction[1].

More discussion can be found [2][3].

[1] https://lkml.kernel.org/r/[email protected]
[2] https://lkml.kernel.org/r/[email protected]
[3] https://lkml.kernel.org/r/[email protected]

Suggested-by: Sean Christopherson <[email protected]>
Signed-off-by: Xiaoyao Li <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/kvm/x86.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bf8564d73fc3..37ce0fc9a62d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5875,6 +5875,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
{
struct kvm_host_map map;
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ u64 page_line_mask;
gpa_t gpa;
char *kaddr;
bool exchanged;
@@ -5889,7 +5890,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto emul_write;

- if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
+ /*
+ * Emulate the atomic as a straight write to avoid #AC if SLD is
+ * enabled in the host and the access splits a cache line.
+ */
+ if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+ page_line_mask = ~(cache_line_size() - 1);
+ else
+ page_line_mask = PAGE_MASK;
+
+ if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
goto emul_write;

if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
--
2.24.1

2020-04-02 16:54:29

by Sean Christopherson

[permalink] [raw]
Subject: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

From: Xiaoyao Li <[email protected]>

Two types #AC can be generated in Intel CPUs:
1. legacy alignment check #AC
2. split lock #AC

Reflect #AC back into the guest if the guest has legacy alignment checks
enabled or if SLD is disabled. If SLD is enabled, treat the guest like
a host userspace application by calling handle_user_split_lock(). If
the #AC is handled (SLD disabled and TIF_SLD set), then simply resume
the guest. If the #AC isn't handled, i.e. host is sld_fatal, then
forward the #AC to the userspace VMM, similar to sending SIGBUS.

Suggested-by: Sean Christopherson <[email protected]>
Signed-off-by: Xiaoyao Li <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/kvm/vmx/vmx.c | 30 +++++++++++++++++++++++++++---
1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 458e684dfbdc..a96cfda0a5b9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4623,6 +4623,12 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
return 1;
}

+static inline bool guest_cpu_alignment_check_enabled(struct kvm_vcpu *vcpu)
+{
+ return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
+ (kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
+}
+
static int handle_exception_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4688,9 +4694,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
return handle_rmode_exception(vcpu, ex_no, error_code);

switch (ex_no) {
- case AC_VECTOR:
- kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
- return 1;
case DB_VECTOR:
dr6 = vmcs_readl(EXIT_QUALIFICATION);
if (!(vcpu->guest_debug &
@@ -4719,6 +4722,27 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
kvm_run->debug.arch.exception = ex_no;
break;
+ case AC_VECTOR:
+ /*
+ * Reflect #AC to the guest if it's expecting the #AC, i.e. has
+ * legacy alignment check enabled. Pre-check host split lock
+ * turned on to avoid the VMREADs needed to check legacy #AC,
+ * i.e. reflect the #AC if the only possible source is legacy
+ * alignment checks.
+ */
+ if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ||
+ guest_cpu_alignment_check_enabled(vcpu)) {
+ kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
+ return 1;
+ }
+
+ /*
+ * Forward the #AC to userspace if kernel policy does not allow
+ * temporarily disabling split lock detection.
+ */
+ if (handle_user_split_lock(kvm_rip_read(vcpu)))
+ return 1;
+ fallthrough;
default:
kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
kvm_run->ex.exception = ex_no;
--
2.24.1

2020-04-02 16:54:49

by Sean Christopherson

[permalink] [raw]
Subject: [PATCH 2/3] x86/split_lock: Refactor and export handle_user_split_lock() for KVM

From: Xiaoyao Li <[email protected]>

In the future, KVM will use handle_user_split_lock() to handle #AC
caused by split lock in guest. Due to the fact that KVM doesn't have
a @regs context and will pre-check EFLAGS.AC, move the EFLAGS.AC check
to do_alignment_check().

Suggested-by: Sean Christopherson <[email protected]>
Signed-off-by: Xiaoyao Li <[email protected]>
Reviewed-by: Tony Luck <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/include/asm/cpu.h | 4 ++--
arch/x86/kernel/cpu/intel.c | 7 ++++---
arch/x86/kernel/traps.c | 2 +-
3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index ff6f3ca649b3..ff567afa6ee1 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -43,11 +43,11 @@ unsigned int x86_stepping(unsigned int sig);
#ifdef CONFIG_CPU_SUP_INTEL
extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
extern void switch_to_sld(unsigned long tifn);
-extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
+extern bool handle_user_split_lock(unsigned long ip);
#else
static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
static inline void switch_to_sld(unsigned long tifn) {}
-static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+static inline bool handle_user_split_lock(unsigned long ip)
{
return false;
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 9a26e972cdea..7688f51aabdb 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1066,13 +1066,13 @@ static void split_lock_init(void)
split_lock_verify_msr(sld_state != sld_off);
}

-bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+bool handle_user_split_lock(unsigned long ip)
{
- if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
+ if (sld_state == sld_fatal)
return false;

pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
- current->comm, current->pid, regs->ip);
+ current->comm, current->pid, ip);

/*
* Disable the split lock detection for this task so it can make
@@ -1083,6 +1083,7 @@ bool handle_user_split_lock(struct pt_regs *regs, long error_code)
set_tsk_thread_flag(current, TIF_SLD);
return true;
}
+EXPORT_SYMBOL_GPL(handle_user_split_lock);

/*
* This function is called only when switching between tasks with
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d54cffdc7cac..55902c5bf0aa 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -304,7 +304,7 @@ dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)

local_irq_enable();

- if (handle_user_split_lock(regs, error_code))
+ if (!(regs->flags & X86_EFLAGS_AC) && handle_user_split_lock(regs->ip))
return;

do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
--
2.24.1

2020-04-02 17:09:47

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch 2/2] x86/kvm/vmx: Prevent split lock detection induced #AC wreckage

Sean Christopherson <[email protected]> writes:
> On Thu, Apr 02, 2020 at 02:33:00PM +0200, Thomas Gleixner wrote:
>> Mark the module with MOD_INFO(sld_safe, "Y") so the module loader does not
>> force SLD off.
>
> Some comments below. But, any objection to taking Xiaoyao's patches that
> do effectively the same things, minus the MOD_INFO()? I'll repost them in
> reply to this thread.

If they are sane, I don't have a problem. But TBH, I really couldn't be
bothered to actually scan my mails whether there surfaced something sane
by now. Writing that up was just faster :)

I'll have look.

>> +static bool guest_handles_ac(struct kvm_vcpu *vcpu)
>> +{
>> + /*
>> + * If guest has alignment checking enabled in CR0 and activated in
>> + * eflags, then the #AC originated from CPL3 and the guest is able
>> + * to handle it. It does not matter whether this is a regular or
>> + * a split lock operation induced #AC.
>> + */
>> + if (vcpu->arch.cr0 & X86_CR0_AM &&
>
> Technically not required since KVM doesn't let the gets toggle CR0.AM at
> will, but going through kvm_read_cr0{_bits}() is preferred.

You're the expert here.

>> + vmx_get_rflags(vcpu) & X86_EFLAGS_AC)
>
> I don't think this is correct. A guest could trigger a split-lock #AC at
> CPL0 with EFLAGS.AC=1 and CR0.AM=1, and then panic because it didn't expect
> #AC at CPL0.

Indeed.

Thanks,

tglx

2020-04-02 17:41:12

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

On Thu, Apr 02, 2020 at 07:19:44PM +0200, Thomas Gleixner wrote:
> Sean Christopherson <[email protected]> writes:
> > @@ -4623,6 +4623,12 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
> > return 1;
> > }
> >
> > +static inline bool guest_cpu_alignment_check_enabled(struct kvm_vcpu *vcpu)
>
> I used a different function name intentionally so the check for 'guest
> want's split lock #AC' can go there as well once it's sorted.

Heh, IIRC, I advised Xiaoyao to do the opposite so that the injection logic
in the #AC case statement was more or less complete without having to dive
into the helper, e.g. the resulting code looks like this once split-lock is
exposed to the guest:

if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ||
guest_cpu_alignment_check_enabled(vcpu) ||
guest_cpu_sld_on(vmx)) {
kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
return 1;
}

> > +{
> > + return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
> > + (kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
> > +}
> > +
> > static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> > {
> > struct vcpu_vmx *vmx = to_vmx(vcpu);
> > @@ -4688,9 +4694,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> > return handle_rmode_exception(vcpu, ex_no, error_code);
> >
> > switch (ex_no) {
> > - case AC_VECTOR:
> > - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> > - return 1;
> > case DB_VECTOR:
> > dr6 = vmcs_readl(EXIT_QUALIFICATION);
> > if (!(vcpu->guest_debug &
> > @@ -4719,6 +4722,27 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> > kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
> > kvm_run->debug.arch.exception = ex_no;
> > break;
> > + case AC_VECTOR:
> > + /*
> > + * Reflect #AC to the guest if it's expecting the #AC, i.e. has
> > + * legacy alignment check enabled. Pre-check host split lock
> > + * turned on to avoid the VMREADs needed to check legacy #AC,
> > + * i.e. reflect the #AC if the only possible source is legacy
> > + * alignment checks.
> > + */
> > + if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ||
>
> I think the right thing to do here is to make this really independent of
> that feature, i.e. inject the exception if
>
> (CPL==3 && CR0.AM && EFLAGS.AC) || (FUTURE && (GUEST_TEST_CTRL & SLD))
>
> iow. when its really clear that the guest asked for it. If there is an
> actual #AC with SLD disabled and !(CPL==3 && CR0.AM && EFLAGS.AC) then
> something is badly wrong and the thing should just die. That's why I
> separated handle_guest_split_lock() and tell about that case.

That puts KVM in a weird spot if/when intercepting #AC is no longer
necessary, e.g. "if" future CPUs happen to gain a feature that traps into
the hypervisor (KVM) if a potential near-infinite ucode loop is detected.

The only reason KVM intercepts #AC (before split-lock) is to prevent a
malicious guest from executing a DoS attack on the host by putting the #AC
handler in ring 3. Current CPUs will get stuck in ucode vectoring #AC
faults more or less indefinitely, e.g. long enough to trigger watchdogs in
the host.

Injecting #AC if and only if KVM is 100% certain the guest wants the #AC
would lead to divergent behavior if KVM chose to not intercept #AC, e.g.
some theoretical unknown #AC source would conditionally result in exits to
userspace depending on whether or not KVM wanted to intercept #AC for
other reasons.

That's why we went with the approach of reflecting #AC unless KVM detected
that the #AC was host-induced.

2020-04-02 18:08:26

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86/split_lock: Refactor and export handle_user_split_lock() for KVM

Sean Christopherson <[email protected]> writes:
> From: Xiaoyao Li <[email protected]>
>
> In the future, KVM will use handle_user_split_lock() to handle #AC
> caused by split lock in guest. Due to the fact that KVM doesn't have
> a @regs context and will pre-check EFLAGS.AC, move the EFLAGS.AC check
> to do_alignment_check().
>
> Suggested-by: Sean Christopherson <[email protected]>
> Signed-off-by: Xiaoyao Li <[email protected]>
> Reviewed-by: Tony Luck <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
> arch/x86/include/asm/cpu.h | 4 ++--
> arch/x86/kernel/cpu/intel.c | 7 ++++---
> arch/x86/kernel/traps.c | 2 +-
> 3 files changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
> index ff6f3ca649b3..ff567afa6ee1 100644
> --- a/arch/x86/include/asm/cpu.h
> +++ b/arch/x86/include/asm/cpu.h
> @@ -43,11 +43,11 @@ unsigned int x86_stepping(unsigned int sig);
> #ifdef CONFIG_CPU_SUP_INTEL
> extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
> extern void switch_to_sld(unsigned long tifn);
> -extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
> +extern bool handle_user_split_lock(unsigned long ip);
> #else
> static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
> static inline void switch_to_sld(unsigned long tifn) {}
> -static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
> +static inline bool handle_user_split_lock(unsigned long ip)

This is necessary because VMX can be compiled without CPU_SUP_INTEL?

> {
> return false;
> }
> diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
> index 9a26e972cdea..7688f51aabdb 100644
> --- a/arch/x86/kernel/cpu/intel.c
> +++ b/arch/x86/kernel/cpu/intel.c
> @@ -1066,13 +1066,13 @@ static void split_lock_init(void)
> split_lock_verify_msr(sld_state != sld_off);
> }
>
> -bool handle_user_split_lock(struct pt_regs *regs, long error_code)
> +bool handle_user_split_lock(unsigned long ip)
> {
> - if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
> + if (sld_state == sld_fatal)
> return false;
>
> pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
> - current->comm, current->pid, regs->ip);
> + current->comm, current->pid, ip);

So this returns true even in the case that sld_state == off.

Should never happen, but I rather have an extra check and be both
verbose and correct. See the variant I did.

Thanks,

tglx


2020-04-02 18:12:56

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

Sean Christopherson <[email protected]> writes:
> @@ -4623,6 +4623,12 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
> return 1;
> }
>
> +static inline bool guest_cpu_alignment_check_enabled(struct kvm_vcpu *vcpu)

I used a different function name intentionally so the check for 'guest
want's split lock #AC' can go there as well once it's sorted.

> +{
> + return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
> + (kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
> +}
> +
> static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -4688,9 +4694,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> return handle_rmode_exception(vcpu, ex_no, error_code);
>
> switch (ex_no) {
> - case AC_VECTOR:
> - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> - return 1;
> case DB_VECTOR:
> dr6 = vmcs_readl(EXIT_QUALIFICATION);
> if (!(vcpu->guest_debug &
> @@ -4719,6 +4722,27 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
> kvm_run->debug.arch.exception = ex_no;
> break;
> + case AC_VECTOR:
> + /*
> + * Reflect #AC to the guest if it's expecting the #AC, i.e. has
> + * legacy alignment check enabled. Pre-check host split lock
> + * turned on to avoid the VMREADs needed to check legacy #AC,
> + * i.e. reflect the #AC if the only possible source is legacy
> + * alignment checks.
> + */
> + if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ||

I think the right thing to do here is to make this really independent of
that feature, i.e. inject the exception if

(CPL==3 && CR0.AM && EFLAGS.AC) || (FUTURE && (GUEST_TEST_CTRL & SLD))

iow. when its really clear that the guest asked for it. If there is an
actual #AC with SLD disabled and !(CPL==3 && CR0.AM && EFLAGS.AC) then
something is badly wrong and the thing should just die. That's why I
separated handle_guest_split_lock() and tell about that case.

Thanks,

tglx


2020-04-02 18:13:12

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86/split_lock: Refactor and export handle_user_split_lock() for KVM

On Thu, Apr 02, 2020 at 07:01:56PM +0200, Thomas Gleixner wrote:
> Sean Christopherson <[email protected]> writes:
> > From: Xiaoyao Li <[email protected]>
> >
> > In the future, KVM will use handle_user_split_lock() to handle #AC
> > caused by split lock in guest. Due to the fact that KVM doesn't have
> > a @regs context and will pre-check EFLAGS.AC, move the EFLAGS.AC check
> > to do_alignment_check().
> >
> > Suggested-by: Sean Christopherson <[email protected]>
> > Signed-off-by: Xiaoyao Li <[email protected]>
> > Reviewed-by: Tony Luck <[email protected]>
> > Signed-off-by: Sean Christopherson <[email protected]>
> > ---
> > arch/x86/include/asm/cpu.h | 4 ++--
> > arch/x86/kernel/cpu/intel.c | 7 ++++---
> > arch/x86/kernel/traps.c | 2 +-
> > 3 files changed, 7 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
> > index ff6f3ca649b3..ff567afa6ee1 100644
> > --- a/arch/x86/include/asm/cpu.h
> > +++ b/arch/x86/include/asm/cpu.h
> > @@ -43,11 +43,11 @@ unsigned int x86_stepping(unsigned int sig);
> > #ifdef CONFIG_CPU_SUP_INTEL
> > extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
> > extern void switch_to_sld(unsigned long tifn);
> > -extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
> > +extern bool handle_user_split_lock(unsigned long ip);
> > #else
> > static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
> > static inline void switch_to_sld(unsigned long tifn) {}
> > -static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
> > +static inline bool handle_user_split_lock(unsigned long ip)
>
> This is necessary because VMX can be compiled without CPU_SUP_INTEL?

Ya, it came about when cleaning up the IA32_FEATURE_CONTROL MSR handling
to consolidate duplicate code.

config KVM_INTEL
tristate "KVM for Intel (and compatible) processors support"
depends on KVM && IA32_FEAT_CTL

config IA32_FEAT_CTL
def_bool y
depends on CPU_SUP_INTEL || CPU_SUP_CENTAUR || CPU_SUP_ZHAOXIN

2020-04-02 19:35:19

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86/split_lock: Refactor and export handle_user_split_lock() for KVM

Sean Christopherson <[email protected]> writes:
> On Thu, Apr 02, 2020 at 07:01:56PM +0200, Thomas Gleixner wrote:
>> > static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
>> > static inline void switch_to_sld(unsigned long tifn) {}
>> > -static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
>> > +static inline bool handle_user_split_lock(unsigned long ip)
>>
>> This is necessary because VMX can be compiled without CPU_SUP_INTEL?
>
> Ya, it came about when cleaning up the IA32_FEATURE_CONTROL MSR handling
> to consolidate duplicate code.
>
> config KVM_INTEL
> tristate "KVM for Intel (and compatible) processors support"
> depends on KVM && IA32_FEAT_CTL
>
> config IA32_FEAT_CTL
> def_bool y
> depends on CPU_SUP_INTEL || CPU_SUP_CENTAUR || CPU_SUP_ZHAOXIN

Ah, indeed. So something like the below would make sense. Hmm?

Of course that can be mangled into Xiaoyao's patches, I'm not worried
about my patch count :)

Aside of that I really wish Intel HW folks had indicated the source of
the #AC via the error code. It can only be 0 or 1 for the regular #AC so
there would have been 31 bits to chose from.

Thanks,

tglx

8<----------------
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -43,14 +43,14 @@ unsigned int x86_stepping(unsigned int s
#ifdef CONFIG_CPU_SUP_INTEL
extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
extern void switch_to_sld(unsigned long tifn);
-extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
+extern int handle_ac_split_lock(unsigned long ip);
extern void split_lock_validate_module_text(struct module *me, void *text, void *text_end);
#else
static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
static inline void switch_to_sld(unsigned long tifn) {}
-static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+static int handle_ac_split_lock(unsigned long ip)
{
- return false;
+ return -ENOSYS;
}
static inline void split_lock_validate_module_text(struct module *me, void *text, void *text_end) {}
#endif

--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1102,13 +1102,20 @@ static void split_lock_init(void)
split_lock_verify_msr(sld_state != sld_off);
}

-bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+int handle_ac_split_lock(unsigned long ip)
{
- if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
- return false;
+ switch (sld_state) {
+ case sld_warn:
+ break;
+ case sld_off:
+ pr_warn_once("#AC: Spurious trap at address: 0x%lx\n", ip);
+ return -ENOSYS;
+ case sld_fatal:
+ return -EFAULT;
+ }

pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
- current->comm, current->pid, regs->ip);
+ current->comm, current->pid, ip);

/*
* Disable the split lock detection for this task so it can make
@@ -1117,8 +1124,9 @@ bool handle_user_split_lock(struct pt_re
*/
sld_update_msr(false);
set_tsk_thread_flag(current, TIF_SLD);
- return true;
+ return 0;
}
+EXPORT_SYMBOL_GPL(handle_ac_split_lock);

/*
* This function is called only when switching between tasks with
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -304,7 +304,7 @@ dotraplinkage void do_alignment_check(st

local_irq_enable();

- if (handle_user_split_lock(regs, error_code))
+ if (!(regs->flags & X86_EFLAGS_AC) && !handle_ac_split_lock(regs->ip))
return;

do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -65,6 +65,7 @@

MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+MODULE_INFO(sld_safe, "Y");

#ifdef MODULE
static const struct x86_cpu_id vmx_cpu_id[] = {
@@ -4623,6 +4624,22 @@ static int handle_machine_check(struct k
return 1;
}

+static bool guest_handles_ac(struct kvm_vcpu *vcpu)
+{
+ /*
+ * If guest has alignment checking enabled in CR0 and activated in
+ * eflags, then the #AC originated from CPL3 and the guest is able
+ * to handle it. It does not matter whether this is a regular or
+ * a split lock operation induced #AC.
+ */
+ if (vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
+ kvm_get_rflags(vcpu) & X86_EFLAGS_AC)
+ return true;
+
+ /* Add guest SLD handling checks here once it's supported */
+ return false;
+}
+
static int handle_exception_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4630,6 +4647,7 @@ static int handle_exception_nmi(struct k
u32 intr_info, ex_no, error_code;
unsigned long cr2, rip, dr6;
u32 vect_info;
+ int err;

vect_info = vmx->idt_vectoring_info;
intr_info = vmx->exit_intr_info;
@@ -4688,9 +4706,6 @@ static int handle_exception_nmi(struct k
return handle_rmode_exception(vcpu, ex_no, error_code);

switch (ex_no) {
- case AC_VECTOR:
- kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
- return 1;
case DB_VECTOR:
dr6 = vmcs_readl(EXIT_QUALIFICATION);
if (!(vcpu->guest_debug &
@@ -4719,6 +4734,29 @@ static int handle_exception_nmi(struct k
kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
kvm_run->debug.arch.exception = ex_no;
break;
+ case AC_VECTOR:
+ if (guest_handles_ac(vcpu)) {
+ kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
+ return 1;
+ }
+ /*
+ * Handle #AC caused by split lock detection. If the host
+ * mode is sld_warn, then it warns, marks current with
+ * TIF_SLD and disables split lock detection. So the guest
+ * can just continue.
+ *
+ * If the host mode is fatal, the handling code warned. Let
+ * qemu kill itself.
+ *
+ * If the host mode is off, then this #AC is bonkers and
+ * something is badly wrong. Let it fail as well.
+ */
+ err = handle_ac_split_lock(kvm_rip_read(vcpu));
+ if (!err)
+ return 1;
+ /* Propagate the error type to user space */
+ error_code = err == -EFAULT ? 0x100 : 0x200;
+ fallthrough;
default:
kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
kvm_run->ex.exception = ex_no;

2020-04-02 20:36:31

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

Sean,

Sean Christopherson <[email protected]> writes:
> On Thu, Apr 02, 2020 at 07:19:44PM +0200, Thomas Gleixner wrote:
>> Sean Christopherson <[email protected]> writes:
>> > + case AC_VECTOR:
>> > + /*
>> > + * Reflect #AC to the guest if it's expecting the #AC, i.e. has
>> > + * legacy alignment check enabled. Pre-check host split lock
>> > + * turned on to avoid the VMREADs needed to check legacy #AC,
>> > + * i.e. reflect the #AC if the only possible source is legacy
>> > + * alignment checks.
>> > + */
>> > + if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ||
>>
>> I think the right thing to do here is to make this really independent of
>> that feature, i.e. inject the exception if
>>
>> (CPL==3 && CR0.AM && EFLAGS.AC) || (FUTURE && (GUEST_TEST_CTRL & SLD))
>>
>> iow. when its really clear that the guest asked for it. If there is an
>> actual #AC with SLD disabled and !(CPL==3 && CR0.AM && EFLAGS.AC) then
>> something is badly wrong and the thing should just die. That's why I
>> separated handle_guest_split_lock() and tell about that case.
>
> That puts KVM in a weird spot if/when intercepting #AC is no longer
> necessary, e.g. "if" future CPUs happen to gain a feature that traps into
> the hypervisor (KVM) if a potential near-infinite ucode loop is detected.
>
> The only reason KVM intercepts #AC (before split-lock) is to prevent a
> malicious guest from executing a DoS attack on the host by putting the #AC
> handler in ring 3. Current CPUs will get stuck in ucode vectoring #AC
> faults more or less indefinitely, e.g. long enough to trigger watchdogs in
> the host.

Which is thankfully well documented in the VMX code and the
corresponding chapter in the SDM.

> Injecting #AC if and only if KVM is 100% certain the guest wants the #AC
> would lead to divergent behavior if KVM chose to not intercept #AC, e.g.

AFAICT, #AC is not really something which is performance relevant, but I
might obviously be uninformed on that.

Assumed it is not, then there is neither a hard requirement nor a real
incentive to give up on intercepting #AC even when future CPUs have a
fix for the above wreckage.

> some theoretical unknown #AC source would conditionally result in exits to
> userspace depending on whether or not KVM wanted to intercept #AC for
> other reasons.

I'd rather like to know when there is an unknown #AC source instead of
letting the guest silently swallow it.

TBH, the more I learn about this, the more I tend to just give up on
this whole split lock stuff in its current form and wait until HW folks
provide something which is actually usable:

- Per thread
- Properly distinguishable from a regular #AC via error code

OTOH, that means I won't be able to use it before retirement. Oh well.

Thanks,

tglx

2020-04-02 20:38:24

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest



> On Apr 2, 2020, at 1:07 PM, Thomas Gleixner <[email protected]> wrote:
>

>
>
> TBH, the more I learn about this, the more I tend to just give up on
> this whole split lock stuff in its current form and wait until HW folks
> provide something which is actually usable:
>
> - Per thread
> - Properly distinguishable from a regular #AC via error code

Why the latter? I would argue that #AC from CPL3 with EFLAGS.AC set is almost by construction not a split lock. In particular, if you meet these conditions, how exactly can you do a split lock without simultaneously triggering an alignment check? (Maybe CMPXCHG16B?

>
> OTOH, that means I won't be able to use it before retirement. Oh well.
>
> Thanks,
>
> tglx

2020-04-02 20:52:51

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

On Thu, Apr 02, 2020 at 10:07:07PM +0200, Thomas Gleixner wrote:
> Sean Christopherson <[email protected]> writes:
> > On Thu, Apr 02, 2020 at 07:19:44PM +0200, Thomas Gleixner wrote:
> >> Sean Christopherson <[email protected]> writes:
> > That puts KVM in a weird spot if/when intercepting #AC is no longer
> > necessary, e.g. "if" future CPUs happen to gain a feature that traps into
> > the hypervisor (KVM) if a potential near-infinite ucode loop is detected.
> >
> > The only reason KVM intercepts #AC (before split-lock) is to prevent a
> > malicious guest from executing a DoS attack on the host by putting the #AC
> > handler in ring 3. Current CPUs will get stuck in ucode vectoring #AC
> > faults more or less indefinitely, e.g. long enough to trigger watchdogs in
> > the host.
>
> Which is thankfully well documented in the VMX code and the
> corresponding chapter in the SDM.
>
> > Injecting #AC if and only if KVM is 100% certain the guest wants the #AC
> > would lead to divergent behavior if KVM chose to not intercept #AC, e.g.
>
> AFAICT, #AC is not really something which is performance relevant, but I
> might obviously be uninformed on that.
>
> Assumed it is not, then there is neither a hard requirement nor a real
> incentive to give up on intercepting #AC even when future CPUs have a
> fix for the above wreckage.

Agreed that there's no hard requirement, but general speaking, the less KVM
needs to poke into the guest the better.

> > some theoretical unknown #AC source would conditionally result in exits to
> > userspace depending on whether or not KVM wanted to intercept #AC for
> > other reasons.
>
> I'd rather like to know when there is an unknown #AC source instead of
> letting the guest silently swallow it.

Trying to prevent the guest from squashing a spurious fault is a fools
errand. For example, with nested virtualization, the correct behavior
from an architectural perspective is to forward exceptions from L2 (the
nested VM) to L1 (the direct VM) that L1 wants to intercept. E.g. if L1
wants to intercept #AC faults that happen in L2, then KVM reflects all #AC
faults into L1 as VM-Exits without ever reaching this code.

Nested virt aside, detecting spurious #AC and a few other exceptions is
mostly feasible, but for many exceptions it's flat out impossible.

Anyways, this particular case isn't a sticking point, i.e. I'd be ok with
exiting to userspace on a spurious #AC, I just don't see the value in doing
so. Returning KVM_EXIT_EXCEPTION doesn't necessarily equate to throwing up
a red flag, e.g. from a kernel perspective you'd still be relying on the
userspace VMM to report the error in a sane manner. I think at one point
Xiaoyao had a WARN_ON for a spurious #AC, but it was removed because the
odds of a false positive due to some funky corner case seemed higher than
detecting a CPU bug.

> TBH, the more I learn about this, the more I tend to just give up on
> this whole split lock stuff in its current form and wait until HW folks
> provide something which is actually usable:
>
> - Per thread
> - Properly distinguishable from a regular #AC via error code
>
> OTOH, that means I won't be able to use it before retirement. Oh well.
>
> Thanks,
>
> tglx

2020-04-02 20:57:09

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

On Thu, Apr 02, 2020 at 10:07:07PM +0200, Thomas Gleixner wrote:

If we're doing wish lists, I have one more ;-)

> TBH, the more I learn about this, the more I tend to just give up on
> this whole split lock stuff in its current form and wait until HW folks
> provide something which is actually usable:
>
> - Per thread
> - Properly distinguishable from a regular #AC via error code

- is an actual alignment check.

That is, disallow all unaligned LOCK prefixed ops, not just those that
happen to straddle a cacheline.

2020-04-02 22:28:37

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

Sean,

Sean Christopherson <[email protected]> writes:
> On Thu, Apr 02, 2020 at 10:07:07PM +0200, Thomas Gleixner wrote:
>> Sean Christopherson <[email protected]> writes:
>> AFAICT, #AC is not really something which is performance relevant, but I
>> might obviously be uninformed on that.
>>
>> Assumed it is not, then there is neither a hard requirement nor a real
>> incentive to give up on intercepting #AC even when future CPUs have a
>> fix for the above wreckage.
>
> Agreed that there's no hard requirement, but general speaking, the less KVM
> needs to poke into the guest the better.

Fair enough.

>> > some theoretical unknown #AC source would conditionally result in exits to
>> > userspace depending on whether or not KVM wanted to intercept #AC for
>> > other reasons.
>>
>> I'd rather like to know when there is an unknown #AC source instead of
>> letting the guest silently swallow it.
>
> Trying to prevent the guest from squashing a spurious fault is a fools
> errand. For example, with nested virtualization, the correct behavior
> from an architectural perspective is to forward exceptions from L2 (the
> nested VM) to L1 (the direct VM) that L1 wants to intercept. E.g. if L1
> wants to intercept #AC faults that happen in L2, then KVM reflects all #AC
> faults into L1 as VM-Exits without ever reaching this code.

Which means L1 should have some handling for that case at least those L1
hypervisors which we can fix. If we want to go there.

> Anyways, this particular case isn't a sticking point, i.e. I'd be ok with
> exiting to userspace on a spurious #AC, I just don't see the value in doing
> so. Returning KVM_EXIT_EXCEPTION doesn't necessarily equate to throwing up
> a red flag, e.g. from a kernel perspective you'd still be relying on the
> userspace VMM to report the error in a sane manner. I think at one point
> Xiaoyao had a WARN_ON for a spurious #AC, but it was removed because the
> odds of a false positive due to some funky corner case seemed higher than
> detecting a CPU bug.

Agreed. Relying on the user space side to crash and burn the guest is
probably wishful thinking. So the right thing might be to just kill it
right at the spot.

But coming back to the above discussion:

if (!cpu_has(SLD) || guest_wants_regular_ac()) {
kvm_queue_exception_e();
return 1;
}

vs.

if (guest_wants_regular_ac()) {
kvm_queue_exception_e();
return 1;
}

The only situation where this makes a difference is when the CPU does
not support SLD. If it does the thing became ambiguous today.

With my previous attempt to bring sanity into this by not setting the
feature flag when SLD is disabled at the command line, the check is
consistent.

But the detection of unaware hypervisors with the module scan brings us
into a situation where we have sld_state == sld_off and the feature flag
being set because we can't undo it anymore.

So now you load VMWare which disables SLD, but the feature bit stays and
then when you unload it and load VMX afterwards then you have exactly
the same situation as with the feature check removed. Consistency gone.

So with that we might want to go back to the sld_state check instead of
the cpu feature check, but that does not make it more consistent:

As I just verified, it's possible to load the vmware module parallel
to the KVM/VMX one.

So either we deal with it in some way or just decide that SLD and HV
modules which do not have the MOD_INFO(sld_safe) magic cannot be loaded
when SLD is enabled on the host. I'm fine with the latter :)

What a mess.

Thanks,

tglx

2020-04-02 22:42:09

by Nadav Amit

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

> On Apr 2, 2020, at 3:27 PM, Thomas Gleixner <[email protected]> wrote:
>
> As I just verified, it's possible to load the vmware module parallel
> to the KVM/VMX one.
>
> So either we deal with it in some way or just decide that SLD and HV
> modules which do not have the MOD_INFO(sld_safe) magic cannot be loaded
> when SLD is enabled on the host. I'm fine with the latter :)
>
> What a mess.

[ +Doug ]

Just to communicate the information that was given to me: we do intend to
fix the SLD issue in VMware and if needed to release a minor version that
addresses it. Having said that, there are other hypervisors, such as
virtualbox or jailhouse, which would have a similar issue.

2020-04-02 23:06:46

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

Nadav Amit <[email protected]> writes:
> Just to communicate the information that was given to me: we do intend to
> fix the SLD issue in VMware and if needed to release a minor version that
> addresses it. Having said that, there are other hypervisors, such as
> virtualbox or jailhouse, which would have a similar issue.

I'm well aware of that and even if VMWare fixes this, this still will
trip up users which fail to install updates for one reason or the other
and leave them puzzled. Maybe we just should not care at all.

Despite that I might have mentioned it before: What a mess ...

Thanks,

tglx

2020-04-02 23:10:47

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

On Thu, 2 Apr 2020 22:40:03 +0000
Nadav Amit <[email protected]> wrote:

> > On Apr 2, 2020, at 3:27 PM, Thomas Gleixner <[email protected]> wrote:
> >
> > As I just verified, it's possible to load the vmware module parallel
> > to the KVM/VMX one.
> >
> > So either we deal with it in some way or just decide that SLD and HV
> > modules which do not have the MOD_INFO(sld_safe) magic cannot be loaded
> > when SLD is enabled on the host. I'm fine with the latter :)
> >
> > What a mess.
>
> [ +Doug ]
>
> Just to communicate the information that was given to me: we do intend to
> fix the SLD issue in VMware and if needed to release a minor version that
> addresses it. Having said that, there are other hypervisors, such as
> virtualbox or jailhouse, which would have a similar issue.

If we go the approach of not letting VM modules load if it doesn't have the
sld_safe flag set, how is this different than a VM module not loading due
to kabi breakage?

If we prevent it from loading (and keeping from having to go into this
inconsistent state that Thomas described), it would encourage people to get
the latest modules, and the maintainers of said modules motivation to
update them.

-- Steve

2020-04-02 23:18:30

by Kenneth Crudup

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest


On Thu, 2 Apr 2020, Steven Rostedt wrote:

> If we go the approach of not letting VM modules load if it doesn't have the
> sld_safe flag set, how is this different than a VM module not loading due
> to kabi breakage?

Why not a compromise: if such a module is attempted to be loaded, print up
a message saying something akin to "turn the parameter 'split_lock_detect'
off" as we reject loading it- and if we see that we've booted with it off
just splat a WARN_ON() if someone tries to load such modules?

-Kenny

--
Kenneth R. Crudup Sr. SW Engineer, Scott County Consulting, Silicon Valley

2020-04-03 00:19:24

by Jim Mattson

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

On Thu, Apr 2, 2020 at 4:16 PM Kenneth R. Crudup <[email protected]> wrote:
>
>
> On Thu, 2 Apr 2020, Steven Rostedt wrote:
>
> > If we go the approach of not letting VM modules load if it doesn't have the
> > sld_safe flag set, how is this different than a VM module not loading due
> > to kabi breakage?
>
> Why not a compromise: if such a module is attempted to be loaded, print up
> a message saying something akin to "turn the parameter 'split_lock_detect'
> off" as we reject loading it- and if we see that we've booted with it off
> just splat a WARN_ON() if someone tries to load such modules?

What modules are we talking about? I thought we were discussing L1
hypervisors, which are just binary blobs. The only modules at the L0
level are kvm and kvm_intel.

2020-04-03 12:17:16

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in guest

Jim,

Jim Mattson <[email protected]> writes:
> On Thu, Apr 2, 2020 at 4:16 PM Kenneth R. Crudup <[email protected]> wrote:
>> On Thu, 2 Apr 2020, Steven Rostedt wrote:
>>
>> > If we go the approach of not letting VM modules load if it doesn't have the
>> > sld_safe flag set, how is this different than a VM module not loading due
>> > to kabi breakage?
>>
>> Why not a compromise: if such a module is attempted to be loaded, print up
>> a message saying something akin to "turn the parameter 'split_lock_detect'
>> off" as we reject loading it- and if we see that we've booted with it off
>> just splat a WARN_ON() if someone tries to load such modules?
>
> What modules are we talking about? I thought we were discussing L1
> hypervisors, which are just binary blobs. The only modules at the L0
> level are kvm and kvm_intel.

Maybe in your world, but VmWare (which got this started), VirtualBox,
Jailhouse and who knows what else _are_ L0 hypervisors. Otherwise we
wouldn't have that conversation at all.

Thanks,

tglx


2020-04-10 04:39:59

by Xiaoyao Li

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86/split_lock: Refactor and export handle_user_split_lock() for KVM

On 4/3/2020 3:06 AM, Thomas Gleixner wrote:
> Sean Christopherson <[email protected]> writes:
>> On Thu, Apr 02, 2020 at 07:01:56PM +0200, Thomas Gleixner wrote:
>>>> static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
>>>> static inline void switch_to_sld(unsigned long tifn) {}
>>>> -static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
>>>> +static inline bool handle_user_split_lock(unsigned long ip)
>>>
>>> This is necessary because VMX can be compiled without CPU_SUP_INTEL?
>>
>> Ya, it came about when cleaning up the IA32_FEATURE_CONTROL MSR handling
>> to consolidate duplicate code.
>>
>> config KVM_INTEL
>> tristate "KVM for Intel (and compatible) processors support"
>> depends on KVM && IA32_FEAT_CTL
>>
>> config IA32_FEAT_CTL
>> def_bool y
>> depends on CPU_SUP_INTEL || CPU_SUP_CENTAUR || CPU_SUP_ZHAOXIN
>
> Ah, indeed. So something like the below would make sense. Hmm?
>
> Of course that can be mangled into Xiaoyao's patches, I'm not worried
> about my patch count :)
>

I don't mind using yours in my next version.

Hi Paolo,

Are you OK with the kvm part below?

If no objection, I can spin the next version using tglx's.

>
> 8<----------------
> --- a/arch/x86/include/asm/cpu.h
> +++ b/arch/x86/include/asm/cpu.h
> @@ -43,14 +43,14 @@ unsigned int x86_stepping(unsigned int s
> #ifdef CONFIG_CPU_SUP_INTEL
> extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
> extern void switch_to_sld(unsigned long tifn);
> -extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
> +extern int handle_ac_split_lock(unsigned long ip);
> extern void split_lock_validate_module_text(struct module *me, void *text, void *text_end);
> #else
> static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
> static inline void switch_to_sld(unsigned long tifn) {}
> -static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
> +static int handle_ac_split_lock(unsigned long ip)
> {
> - return false;
> + return -ENOSYS;
> }
> static inline void split_lock_validate_module_text(struct module *me, void *text, void *text_end) {}
> #endif
>
> --- a/arch/x86/kernel/cpu/intel.c
> +++ b/arch/x86/kernel/cpu/intel.c
> @@ -1102,13 +1102,20 @@ static void split_lock_init(void)
> split_lock_verify_msr(sld_state != sld_off);
> }
>
> -bool handle_user_split_lock(struct pt_regs *regs, long error_code)
> +int handle_ac_split_lock(unsigned long ip)
> {
> - if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
> - return false;
> + switch (sld_state) {
> + case sld_warn:
> + break;
> + case sld_off:
> + pr_warn_once("#AC: Spurious trap at address: 0x%lx\n", ip);
> + return -ENOSYS;
> + case sld_fatal:
> + return -EFAULT;
> + }
>
> pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
> - current->comm, current->pid, regs->ip);
> + current->comm, current->pid, ip);
>
> /*
> * Disable the split lock detection for this task so it can make
> @@ -1117,8 +1124,9 @@ bool handle_user_split_lock(struct pt_re
> */
> sld_update_msr(false);
> set_tsk_thread_flag(current, TIF_SLD);
> - return true;
> + return 0;
> }
> +EXPORT_SYMBOL_GPL(handle_ac_split_lock);
>
> /*
> * This function is called only when switching between tasks with
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -304,7 +304,7 @@ dotraplinkage void do_alignment_check(st
>
> local_irq_enable();
>
> - if (handle_user_split_lock(regs, error_code))
> + if (!(regs->flags & X86_EFLAGS_AC) && !handle_ac_split_lock(regs->ip))
> return;
>
> do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -65,6 +65,7 @@
>
> MODULE_AUTHOR("Qumranet");
> MODULE_LICENSE("GPL");
> +MODULE_INFO(sld_safe, "Y");
>
> #ifdef MODULE
> static const struct x86_cpu_id vmx_cpu_id[] = {
> @@ -4623,6 +4624,22 @@ static int handle_machine_check(struct k
> return 1;
> }
>
> +static bool guest_handles_ac(struct kvm_vcpu *vcpu)
> +{
> + /*
> + * If guest has alignment checking enabled in CR0 and activated in
> + * eflags, then the #AC originated from CPL3 and the guest is able
> + * to handle it. It does not matter whether this is a regular or
> + * a split lock operation induced #AC.
> + */
> + if (vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
> + kvm_get_rflags(vcpu) & X86_EFLAGS_AC)
> + return true;
> +
> + /* Add guest SLD handling checks here once it's supported */
> + return false;
> +}
> +
> static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -4630,6 +4647,7 @@ static int handle_exception_nmi(struct k
> u32 intr_info, ex_no, error_code;
> unsigned long cr2, rip, dr6;
> u32 vect_info;
> + int err;
>
> vect_info = vmx->idt_vectoring_info;
> intr_info = vmx->exit_intr_info;
> @@ -4688,9 +4706,6 @@ static int handle_exception_nmi(struct k
> return handle_rmode_exception(vcpu, ex_no, error_code);
>
> switch (ex_no) {
> - case AC_VECTOR:
> - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> - return 1;
> case DB_VECTOR:
> dr6 = vmcs_readl(EXIT_QUALIFICATION);
> if (!(vcpu->guest_debug &
> @@ -4719,6 +4734,29 @@ static int handle_exception_nmi(struct k
> kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
> kvm_run->debug.arch.exception = ex_no;
> break;
> + case AC_VECTOR:
> + if (guest_handles_ac(vcpu)) {
> + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> + return 1;
> + }
> + /*
> + * Handle #AC caused by split lock detection. If the host
> + * mode is sld_warn, then it warns, marks current with
> + * TIF_SLD and disables split lock detection. So the guest
> + * can just continue.
> + *
> + * If the host mode is fatal, the handling code warned. Let
> + * qemu kill itself.
> + *
> + * If the host mode is off, then this #AC is bonkers and
> + * something is badly wrong. Let it fail as well.
> + */
> + err = handle_ac_split_lock(kvm_rip_read(vcpu));
> + if (!err)
> + return 1;
> + /* Propagate the error type to user space */
> + error_code = err == -EFAULT ? 0x100 : 0x200;
> + fallthrough;
> default:
> kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
> kvm_run->ex.exception = ex_no;
>

2020-04-10 10:24:33

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86/split_lock: Refactor and export handle_user_split_lock() for KVM

On 10/04/20 06:39, Xiaoyao Li wrote:
>
>   +static bool guest_handles_ac(struct kvm_vcpu *vcpu)
> +{
> +    /*
> +     * If guest has alignment checking enabled in CR0 and activated in
> +     * eflags, then the #AC originated from CPL3 and the guest is able
> +     * to handle it. It does not matter whether this is a regular or
> +     * a split lock operation induced #AC.
> +     */
> +    if (vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
> +        kvm_get_rflags(vcpu) & X86_EFLAGS_AC)
> +        return true;
> +
> +    /* Add guest SLD handling checks here once it's supported */
> +    return false;
> +}
> +
>   static int handle_exception_nmi(struct kvm_vcpu *vcpu)
>   {
>       struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -4630,6 +4647,7 @@ static int handle_exception_nmi(struct k
>       u32 intr_info, ex_no, error_code;
>       unsigned long cr2, rip, dr6;
>       u32 vect_info;
> +    int err;
>         vect_info = vmx->idt_vectoring_info;
>       intr_info = vmx->exit_intr_info;
> @@ -4688,9 +4706,6 @@ static int handle_exception_nmi(struct k
>           return handle_rmode_exception(vcpu, ex_no, error_code);
>         switch (ex_no) {
> -    case AC_VECTOR:
> -        kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> -        return 1;
>       case DB_VECTOR:
>           dr6 = vmcs_readl(EXIT_QUALIFICATION);
>           if (!(vcpu->guest_debug &
> @@ -4719,6 +4734,29 @@ static int handle_exception_nmi(struct k
>           kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
>           kvm_run->debug.arch.exception = ex_no;
>           break;
> +    case AC_VECTOR:
> +        if (guest_handles_ac(vcpu)) {
> +            kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
> +            return 1;
> +        }
> +        /*
> +         * Handle #AC caused by split lock detection. If the host
> +         * mode is sld_warn, then it warns, marks current with
> +         * TIF_SLD and disables split lock detection. So the guest
> +         * can just continue.
> +         *
> +         * If the host mode is fatal, the handling code warned. Let
> +         * qemu kill itself.
> +         *
> +         * If the host mode is off, then this #AC is bonkers and
> +         * something is badly wrong. Let it fail as well.
> +         */
> +        err = handle_ac_split_lock(kvm_rip_read(vcpu));
> +        if (!err)
> +            return 1;
> +        /* Propagate the error type to user space */
> +        error_code = err == -EFAULT ? 0x100 : 0x200;
> +        fallthrough;


Acked-by: Paolo Bonzini <[email protected]>

2020-04-10 10:25:36

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 0/3] x86: KVM: VMX: Add basic split-lock #AC handling

On 02/04/20 17:55, Sean Christopherson wrote:
> First three patches from Xiaoyao's series to add split-lock #AC support
> in KVM.
>
> Xiaoyao Li (3):
> KVM: x86: Emulate split-lock access as a write in emulator
> x86/split_lock: Refactor and export handle_user_split_lock() for KVM
> KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in
> guest

Sorry I was out of the loop on this (I'm working part time and it's a
mess). Sean, can you send the patches as a top-level message? I'll
queue them and get them to Linus over the weekend.

Paolo

2020-04-10 11:18:03

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 0/3] x86: KVM: VMX: Add basic split-lock #AC handling

Paolo Bonzini <[email protected]> writes:
> On 02/04/20 17:55, Sean Christopherson wrote:
>> First three patches from Xiaoyao's series to add split-lock #AC support
>> in KVM.
>>
>> Xiaoyao Li (3):
>> KVM: x86: Emulate split-lock access as a write in emulator
>> x86/split_lock: Refactor and export handle_user_split_lock() for KVM
>> KVM: VMX: Extend VMX's #AC interceptor to handle split lock #AC in
>> guest
>
> Sorry I was out of the loop on this (I'm working part time and it's a
> mess). Sean, can you send the patches as a top-level message? I'll
> queue them and get them to Linus over the weekend.

I have a reworked version. I'll post it after lunch.

Thanks,

tglx