Here are the patches I mentioned while reviewing Liu Jinsong's MPX
series.
Patch 1 is a further cleanup of xcr0 handling, and patch 2 introduces
nested virtualization support for MPX.
Please review.
Thanks,
Paolo
Paolo Bonzini (2):
KVM: x86: introduce kvm_supported_xcr0()
KVM: x86: Add nested virtualization support for MPX
arch/x86/kvm/cpuid.c | 27 ++++++++++++++++-----------
arch/x86/kvm/vmx.c | 17 +++++++++++++++++
arch/x86/kvm/x86.c | 4 +---
arch/x86/kvm/x86.h | 2 ++
4 files changed, 36 insertions(+), 14 deletions(-)
--
1.8.3.1
XSAVE support for KVM is already using host_xcr0 & KVM_SUPPORTED_XCR0 as
a "dynamic" version of KVM_SUPPORTED_XCR0.
However, this is not enough because the MPX bits should not be presented
to the guest unless kvm_x86_ops confirms the support. So, replace all
instances of host_xcr0 & KVM_SUPPORTED_XCR0 with a new function
kvm_supported_xcr0() that also has this check.
Note that here:
if (xstate_bv & ~KVM_SUPPORTED_XCR0)
return -EINVAL;
if (xstate_bv & ~host_cr0)
return -EINVAL;
the code is equivalent to
if ((xstate_bv & ~KVM_SUPPORTED_XCR0) ||
(xstate_bv & ~host_cr0)
return -EINVAL;
i.e. "xstate_bv & (~KVM_SUPPORTED_XCR0 | ~host_cr0)" which is in turn
equal to "xstate_bv & ~(KVM_SUPPORTED_XCR0 & host_cr0)". So we should
also use the new function there.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/cpuid.c | 27 ++++++++++++++++-----------
arch/x86/kvm/x86.c | 4 +---
arch/x86/kvm/x86.h | 2 ++
3 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ddc8a7e165df..18aefb4d0927 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -43,6 +43,16 @@ static u32 xstate_required_size(u64 xstate_bv)
return ret;
}
+u64 kvm_supported_xcr0(void)
+{
+ u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
+
+ if (!kvm_x86_ops->mpx_supported || !kvm_x86_ops->mpx_supported())
+ xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
+
+ return xcr0;
+}
+
void kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -73,7 +83,7 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
} else {
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) &
- host_xcr0 & KVM_SUPPORTED_XCR0;
+ kvm_supported_xcr0();
vcpu->arch.guest_xstate_size = best->ebx =
xstate_required_size(vcpu->arch.xcr0);
}
@@ -210,13 +220,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->flags = 0;
}
-static bool supported_xcr0_bit(unsigned bit)
-{
- u64 mask = ((u64)1 << bit);
-
- return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
-}
-
#define F(x) bit(X86_FEATURE_##x)
static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
@@ -439,16 +442,18 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
case 0xd: {
int idx, i;
+ u64 supported = kvm_supported_xcr0();
- entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
- entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
+ entry->eax &= supported;
+ entry->edx &= supported >> 32;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
for (idx = 1, i = 1; idx < 64; ++idx) {
+ u64 mask = ((u64)1 << idx);
if (*nent >= maxnent)
goto out;
do_cpuid_1_ent(&entry[i], function, idx);
- if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
+ if (entry[i].eax == 0 || !(supported & mask))
continue;
entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4cca45853dfe..109985016389 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3048,9 +3048,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
* CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
* with old userspace.
*/
- if (xstate_bv & ~KVM_SUPPORTED_XCR0)
- return -EINVAL;
- if (xstate_bv & ~host_xcr0)
+ if (xstate_bv & ~kvm_supported_xcr0())
return -EINVAL;
memcpy(&vcpu->arch.guest_fpu.state->xsave,
guest_xsave->region, vcpu->arch.guest_xstate_size);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 392ecbff0030..8c97bac9a895 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -126,6 +126,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
| XSTATE_BNDREGS | XSTATE_BNDCSR)
extern u64 host_xcr0;
+extern u64 kvm_supported_xcr0(void);
+
extern unsigned int min_timer_period_us;
extern struct static_key kvm_no_apic_vcpu;
--
1.8.3.1
This is simple to do, the "host" BNDCFGS is either 0 or the guest value.
However, both controls have to be present. We cannot provide MPX if
we only have one of the "load BNDCFGS" or "clear BNDCFGS" controls.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/vmx.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 83ee24f74a69..daca80f0eae2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -202,6 +202,7 @@ struct __packed vmcs12 {
u64 guest_pdptr1;
u64 guest_pdptr2;
u64 guest_pdptr3;
+ u64 guest_bndcfgs;
u64 host_ia32_pat;
u64 host_ia32_efer;
u64 host_ia32_perf_global_ctrl;
@@ -534,6 +535,7 @@ static const unsigned long shadow_read_write_fields[] = {
GUEST_CS_LIMIT,
GUEST_CS_BASE,
GUEST_ES_BASE,
+ GUEST_BNDCFGS,
CR0_GUEST_HOST_MASK,
CR0_READ_SHADOW,
CR4_READ_SHADOW,
@@ -589,6 +591,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(GUEST_PDPTR1, guest_pdptr1),
FIELD64(GUEST_PDPTR2, guest_pdptr2),
FIELD64(GUEST_PDPTR3, guest_pdptr3),
+ FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
FIELD64(HOST_IA32_PAT, host_ia32_pat),
FIELD64(HOST_IA32_EFER, host_ia32_efer),
FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
@@ -719,6 +722,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
+static bool vmx_mpx_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
@@ -2279,6 +2283,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
}
nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
+ if (vmx_mpx_supported())
+ nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2292,6 +2298,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
VM_ENTRY_LOAD_IA32_PAT;
nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
VM_ENTRY_LOAD_IA32_EFER);
+ if (vmx_mpx_supported())
+ nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -7847,6 +7855,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
set_cr4_guest_host_mask(vmx);
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vmcs_write64(TSC_OFFSET,
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
@@ -8277,6 +8288,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
+ if (vmx_mpx_supported())
+ vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
/* update exit information fields: */
@@ -8386,6 +8399,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
+ /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
+ if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
+ vmcs_write64(GUEST_BNDCFGS, 0);
+
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
--
1.8.3.1
On 2014-02-26 17:54, Paolo Bonzini wrote:
> Here are the patches I mentioned while reviewing Liu Jinsong's MPX
> series.
>
> Patch 1 is a further cleanup of xcr0 handling, and patch 2 introduces
> nested virtualization support for MPX.
>
> Please review.
>
> Thanks,
>
> Paolo
>
> Paolo Bonzini (2):
> KVM: x86: introduce kvm_supported_xcr0()
> KVM: x86: Add nested virtualization support for MPX
>
> arch/x86/kvm/cpuid.c | 27 ++++++++++++++++-----------
> arch/x86/kvm/vmx.c | 17 +++++++++++++++++
> arch/x86/kvm/x86.c | 4 +---
> arch/x86/kvm/x86.h | 2 ++
> 4 files changed, 36 insertions(+), 14 deletions(-)
>
Is this round different from the other v2?
Jan
--
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux
Il 27/02/2014 12:19, Jan Kiszka ha scritto:
> On 2014-02-26 17:54, Paolo Bonzini wrote:
>> Here are the patches I mentioned while reviewing Liu Jinsong's MPX
>> series.
>>
>> Patch 1 is a further cleanup of xcr0 handling, and patch 2 introduces
>> nested virtualization support for MPX.
>>
>> Please review.
>>
>> Thanks,
>>
>> Paolo
>>
>> Paolo Bonzini (2):
>> KVM: x86: introduce kvm_supported_xcr0()
>> KVM: x86: Add nested virtualization support for MPX
>>
>> arch/x86/kvm/cpuid.c | 27 ++++++++++++++++-----------
>> arch/x86/kvm/vmx.c | 17 +++++++++++++++++
>> arch/x86/kvm/x86.c | 4 +---
>> arch/x86/kvm/x86.h | 2 ++
>> 4 files changed, 36 insertions(+), 14 deletions(-)
>>
>
> Is this round different from the other v2?
No, they're the same.
Paolo