vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling
VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an
indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be
called. This is obviously not the case if both are used independtly.
Calling VMXOFF without a previous VMXON will result in an exception.
In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in
use by another VMM in hardware_enable(). So there can't really be
co-existance. If the other VMM is prepared for co-existance and does a
similar check, only one VMM can exist. If the other VMM is not prepared
and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with
X86_CR4_VMXE.
As we also had bug reports related to clearing of vmcs with vmm_exclusive=0
this seems to be pretty much untested. So let's better drop it.
While at it, directly move setting/clearing X86_CR4_VMXE into
kvm_cpu_vmxon/off.
Signed-off-by: David Hildenbrand <[email protected]>
---
arch/x86/kvm/vmx.c | 38 +++++++-------------------------------
1 file changed, 7 insertions(+), 31 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 283aa86..bbbfe12 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
static bool __read_mostly emulate_invalid_guest_state = true;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
-static bool __read_mostly vmm_exclusive = 1;
-module_param(vmm_exclusive, bool, S_IRUGO);
-
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
@@ -914,8 +911,6 @@ static void nested_release_page_clean(struct page *page)
static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
-static void kvm_cpu_vmxon(u64 addr);
-static void kvm_cpu_vmxoff(void);
static bool vmx_xsaves_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -2235,15 +2230,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(phys_addr);
- else if (!already_loaded)
- loaded_vmcs_clear(vmx->loaded_vmcs);
-
if (!already_loaded) {
+ loaded_vmcs_clear(vmx->loaded_vmcs);
local_irq_disable();
crash_disable_local_vmclear(cpu);
@@ -2321,11 +2311,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
vmx_vcpu_pi_put(vcpu);
__vmx_load_host_state(to_vmx(vcpu));
- if (!vmm_exclusive) {
- __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
- vcpu->cpu = -1;
- kvm_cpu_vmxoff();
- }
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
@@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ cr4_set_bits(X86_CR4_VMXE);
intel_pt_handle_vmx(1);
asm volatile (ASM_VMX_VMXON_RAX
@@ -3458,12 +3444,8 @@ static int hardware_enable(void)
/* enable and lock */
wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
}
- cr4_set_bits(X86_CR4_VMXE);
-
- if (vmm_exclusive) {
- kvm_cpu_vmxon(phys_addr);
- ept_sync_global();
- }
+ kvm_cpu_vmxon(phys_addr);
+ ept_sync_global();
native_store_gdt(this_cpu_ptr(&host_gdt));
@@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void)
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
intel_pt_handle_vmx(0);
+ cr4_clear_bits(X86_CR4_VMXE);
}
static void hardware_disable(void)
{
- if (vmm_exclusive) {
- vmclear_local_loaded_vmcss();
- kvm_cpu_vmxoff();
- }
- cr4_clear_bits(X86_CR4_VMXE);
+ vmclear_local_loaded_vmcss();
+ kvm_cpu_vmxoff();
}
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -9228,11 +9208,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->loaded_vmcs->shadow_vmcs = NULL;
if (!vmx->loaded_vmcs->vmcs)
goto free_msrs;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
loaded_vmcs_init(vmx->loaded_vmcs);
- if (!vmm_exclusive)
- kvm_cpu_vmxoff();
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
--
2.9.3
2017-03-10 12:47+0100, David Hildenbrand:
> vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling
> VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an
> indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be
> called. This is obviously not the case if both are used independtly.
> Calling VMXOFF without a previous VMXON will result in an exception.
>
> In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in
> use by another VMM in hardware_enable(). So there can't really be
> co-existance. If the other VMM is prepared for co-existance and does a
> similar check, only one VMM can exist. If the other VMM is not prepared
> and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with
> X86_CR4_VMXE.
>
> As we also had bug reports related to clearing of vmcs with vmm_exclusive=0
> this seems to be pretty much untested. So let's better drop it.
Totally,
Reviewed-by: Radim Krčmář <[email protected]>
> While at it, directly move setting/clearing X86_CR4_VMXE into
> kvm_cpu_vmxon/off.
>
> Signed-off-by: David Hildenbrand <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 38 +++++++-------------------------------
> 1 file changed, 7 insertions(+), 31 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 283aa86..bbbfe12 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
> static bool __read_mostly emulate_invalid_guest_state = true;
> module_param(emulate_invalid_guest_state, bool, S_IRUGO);
>
> -static bool __read_mostly vmm_exclusive = 1;
> -module_param(vmm_exclusive, bool, S_IRUGO);
> -
> static bool __read_mostly fasteoi = 1;
> module_param(fasteoi, bool, S_IRUGO);
>
> @@ -914,8 +911,6 @@ static void nested_release_page_clean(struct page *page)
>
> static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
> static u64 construct_eptp(unsigned long root_hpa);
> -static void kvm_cpu_vmxon(u64 addr);
> -static void kvm_cpu_vmxoff(void);
> static bool vmx_xsaves_supported(void);
> static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
> static void vmx_set_segment(struct kvm_vcpu *vcpu,
> @@ -2235,15 +2230,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
> static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> - u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
> bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
>
> - if (!vmm_exclusive)
> - kvm_cpu_vmxon(phys_addr);
> - else if (!already_loaded)
> - loaded_vmcs_clear(vmx->loaded_vmcs);
> -
> if (!already_loaded) {
> + loaded_vmcs_clear(vmx->loaded_vmcs);
> local_irq_disable();
> crash_disable_local_vmclear(cpu);
>
> @@ -2321,11 +2311,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
> vmx_vcpu_pi_put(vcpu);
>
> __vmx_load_host_state(to_vmx(vcpu));
> - if (!vmm_exclusive) {
> - __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
> - vcpu->cpu = -1;
> - kvm_cpu_vmxoff();
> - }
> }
>
> static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
> @@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void)
>
> static void kvm_cpu_vmxon(u64 addr)
> {
> + cr4_set_bits(X86_CR4_VMXE);
> intel_pt_handle_vmx(1);
>
> asm volatile (ASM_VMX_VMXON_RAX
> @@ -3458,12 +3444,8 @@ static int hardware_enable(void)
> /* enable and lock */
> wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
> }
> - cr4_set_bits(X86_CR4_VMXE);
> -
> - if (vmm_exclusive) {
> - kvm_cpu_vmxon(phys_addr);
> - ept_sync_global();
> - }
> + kvm_cpu_vmxon(phys_addr);
> + ept_sync_global();
>
> native_store_gdt(this_cpu_ptr(&host_gdt));
>
> @@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void)
> asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
>
> intel_pt_handle_vmx(0);
> + cr4_clear_bits(X86_CR4_VMXE);
> }
>
> static void hardware_disable(void)
> {
> - if (vmm_exclusive) {
> - vmclear_local_loaded_vmcss();
> - kvm_cpu_vmxoff();
> - }
> - cr4_clear_bits(X86_CR4_VMXE);
> + vmclear_local_loaded_vmcss();
> + kvm_cpu_vmxoff();
> }
>
> static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
> @@ -9228,11 +9208,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
> vmx->loaded_vmcs->shadow_vmcs = NULL;
> if (!vmx->loaded_vmcs->vmcs)
> goto free_msrs;
> - if (!vmm_exclusive)
> - kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
> loaded_vmcs_init(vmx->loaded_vmcs);
> - if (!vmm_exclusive)
> - kvm_cpu_vmxoff();
>
> cpu = get_cpu();
> vmx_vcpu_load(&vmx->vcpu, cpu);
> --
> 2.9.3
>
On 14/03/2017 21:30, Radim Krčmář wrote:
> 2017-03-10 12:47+0100, David Hildenbrand:
>> vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling
>> VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an
>> indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be
>> called. This is obviously not the case if both are used independtly.
>> Calling VMXOFF without a previous VMXON will result in an exception.
>>
>> In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in
>> use by another VMM in hardware_enable(). So there can't really be
>> co-existance. If the other VMM is prepared for co-existance and does a
>> similar check, only one VMM can exist. If the other VMM is not prepared
>> and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with
>> X86_CR4_VMXE.
>>
>> As we also had bug reports related to clearing of vmcs with vmm_exclusive=0
>> this seems to be pretty much untested. So let's better drop it.
>
> Totally,
>
> Reviewed-by: Radim Krčmář <[email protected]>
Radim,
are you putting this in kvm/queue for 4.12?
Thanks,
Paolo
>> While at it, directly move setting/clearing X86_CR4_VMXE into
>> kvm_cpu_vmxon/off.
>>
>> Signed-off-by: David Hildenbrand <[email protected]>
>> ---
>> arch/x86/kvm/vmx.c | 38 +++++++-------------------------------
>> 1 file changed, 7 insertions(+), 31 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 283aa86..bbbfe12 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
>> static bool __read_mostly emulate_invalid_guest_state = true;
>> module_param(emulate_invalid_guest_state, bool, S_IRUGO);
>>
>> -static bool __read_mostly vmm_exclusive = 1;
>> -module_param(vmm_exclusive, bool, S_IRUGO);
>> -
>> static bool __read_mostly fasteoi = 1;
>> module_param(fasteoi, bool, S_IRUGO);
>>
>> @@ -914,8 +911,6 @@ static void nested_release_page_clean(struct page *page)
>>
>> static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
>> static u64 construct_eptp(unsigned long root_hpa);
>> -static void kvm_cpu_vmxon(u64 addr);
>> -static void kvm_cpu_vmxoff(void);
>> static bool vmx_xsaves_supported(void);
>> static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
>> static void vmx_set_segment(struct kvm_vcpu *vcpu,
>> @@ -2235,15 +2230,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
>> static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>> {
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> - u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
>> bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
>>
>> - if (!vmm_exclusive)
>> - kvm_cpu_vmxon(phys_addr);
>> - else if (!already_loaded)
>> - loaded_vmcs_clear(vmx->loaded_vmcs);
>> -
>> if (!already_loaded) {
>> + loaded_vmcs_clear(vmx->loaded_vmcs);
>> local_irq_disable();
>> crash_disable_local_vmclear(cpu);
>>
>> @@ -2321,11 +2311,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
>> vmx_vcpu_pi_put(vcpu);
>>
>> __vmx_load_host_state(to_vmx(vcpu));
>> - if (!vmm_exclusive) {
>> - __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
>> - vcpu->cpu = -1;
>> - kvm_cpu_vmxoff();
>> - }
>> }
>>
>> static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
>> @@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void)
>>
>> static void kvm_cpu_vmxon(u64 addr)
>> {
>> + cr4_set_bits(X86_CR4_VMXE);
>> intel_pt_handle_vmx(1);
>>
>> asm volatile (ASM_VMX_VMXON_RAX
>> @@ -3458,12 +3444,8 @@ static int hardware_enable(void)
>> /* enable and lock */
>> wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
>> }
>> - cr4_set_bits(X86_CR4_VMXE);
>> -
>> - if (vmm_exclusive) {
>> - kvm_cpu_vmxon(phys_addr);
>> - ept_sync_global();
>> - }
>> + kvm_cpu_vmxon(phys_addr);
>> + ept_sync_global();
>>
>> native_store_gdt(this_cpu_ptr(&host_gdt));
>>
>> @@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void)
>> asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
>>
>> intel_pt_handle_vmx(0);
>> + cr4_clear_bits(X86_CR4_VMXE);
>> }
>>
>> static void hardware_disable(void)
>> {
>> - if (vmm_exclusive) {
>> - vmclear_local_loaded_vmcss();
>> - kvm_cpu_vmxoff();
>> - }
>> - cr4_clear_bits(X86_CR4_VMXE);
>> + vmclear_local_loaded_vmcss();
>> + kvm_cpu_vmxoff();
>> }
>>
>> static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
>> @@ -9228,11 +9208,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>> vmx->loaded_vmcs->shadow_vmcs = NULL;
>> if (!vmx->loaded_vmcs->vmcs)
>> goto free_msrs;
>> - if (!vmm_exclusive)
>> - kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
>> loaded_vmcs_init(vmx->loaded_vmcs);
>> - if (!vmm_exclusive)
>> - kvm_cpu_vmxoff();
>>
>> cpu = get_cpu();
>> vmx_vcpu_load(&vmx->vcpu, cpu);
>> --
>> 2.9.3
>>
On 10/03/2017 12:47, David Hildenbrand wrote:
> vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling
> VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an
> indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be
> called. This is obviously not the case if both are used independtly.
> Calling VMXOFF without a previous VMXON will result in an exception.
>
> In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in
> use by another VMM in hardware_enable(). So there can't really be
> co-existance. If the other VMM is prepared for co-existance and does a
> similar check, only one VMM can exist. If the other VMM is not prepared
> and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with
> X86_CR4_VMXE.
>
> As we also had bug reports related to clearing of vmcs with vmm_exclusive=0
> this seems to be pretty much untested. So let's better drop it.
>
> While at it, directly move setting/clearing X86_CR4_VMXE into
> kvm_cpu_vmxon/off.
>
> Signed-off-by: David Hildenbrand <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 38 +++++++-------------------------------
> 1 file changed, 7 insertions(+), 31 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 283aa86..bbbfe12 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
> static bool __read_mostly emulate_invalid_guest_state = true;
> module_param(emulate_invalid_guest_state, bool, S_IRUGO);
>
> -static bool __read_mostly vmm_exclusive = 1;
> -module_param(vmm_exclusive, bool, S_IRUGO);
> -
> static bool __read_mostly fasteoi = 1;
> module_param(fasteoi, bool, S_IRUGO);
>
> @@ -914,8 +911,6 @@ static void nested_release_page_clean(struct page *page)
>
> static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
> static u64 construct_eptp(unsigned long root_hpa);
> -static void kvm_cpu_vmxon(u64 addr);
> -static void kvm_cpu_vmxoff(void);
> static bool vmx_xsaves_supported(void);
> static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
> static void vmx_set_segment(struct kvm_vcpu *vcpu,
> @@ -2235,15 +2230,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
> static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> - u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
> bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
>
> - if (!vmm_exclusive)
> - kvm_cpu_vmxon(phys_addr);
> - else if (!already_loaded)
> - loaded_vmcs_clear(vmx->loaded_vmcs);
> -
> if (!already_loaded) {
> + loaded_vmcs_clear(vmx->loaded_vmcs);
> local_irq_disable();
> crash_disable_local_vmclear(cpu);
>
> @@ -2321,11 +2311,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
> vmx_vcpu_pi_put(vcpu);
>
> __vmx_load_host_state(to_vmx(vcpu));
> - if (!vmm_exclusive) {
> - __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
> - vcpu->cpu = -1;
> - kvm_cpu_vmxoff();
> - }
> }
>
> static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
> @@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void)
>
> static void kvm_cpu_vmxon(u64 addr)
> {
> + cr4_set_bits(X86_CR4_VMXE);
> intel_pt_handle_vmx(1);
>
> asm volatile (ASM_VMX_VMXON_RAX
> @@ -3458,12 +3444,8 @@ static int hardware_enable(void)
> /* enable and lock */
> wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
> }
> - cr4_set_bits(X86_CR4_VMXE);
> -
> - if (vmm_exclusive) {
> - kvm_cpu_vmxon(phys_addr);
> - ept_sync_global();
> - }
> + kvm_cpu_vmxon(phys_addr);
> + ept_sync_global();
>
> native_store_gdt(this_cpu_ptr(&host_gdt));
>
> @@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void)
> asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
>
> intel_pt_handle_vmx(0);
> + cr4_clear_bits(X86_CR4_VMXE);
> }
>
> static void hardware_disable(void)
> {
> - if (vmm_exclusive) {
> - vmclear_local_loaded_vmcss();
> - kvm_cpu_vmxoff();
> - }
> - cr4_clear_bits(X86_CR4_VMXE);
> + vmclear_local_loaded_vmcss();
> + kvm_cpu_vmxoff();
> }
>
> static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
> @@ -9228,11 +9208,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
> vmx->loaded_vmcs->shadow_vmcs = NULL;
> if (!vmx->loaded_vmcs->vmcs)
> goto free_msrs;
> - if (!vmm_exclusive)
> - kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
> loaded_vmcs_init(vmx->loaded_vmcs);
> - if (!vmm_exclusive)
> - kvm_cpu_vmxoff();
>
> cpu = get_cpu();
> vmx_vcpu_load(&vmx->vcpu, cpu);
>
Applied, thanks.
Paolo
Em Fri, Mar 10, 2017 at 12:47:13PM +0100, David Hildenbrand escreveu:
> vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling
> VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an
> indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be
> called. This is obviously not the case if both are used independtly.
> Calling VMXOFF without a previous VMXON will result in an exception.
>
> In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in
> use by another VMM in hardware_enable(). So there can't really be
> co-existance. If the other VMM is prepared for co-existance and does a
> similar check, only one VMM can exist. If the other VMM is not prepared
> and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with
> X86_CR4_VMXE.
>
> As we also had bug reports related to clearing of vmcs with vmm_exclusive=0
> this seems to be pretty much untested. So let's better drop it.
>
> While at it, directly move setting/clearing X86_CR4_VMXE into
> kvm_cpu_vmxon/off.
Oh well, I was using, as suggested by Alexander, this parameter to be
able to use Intel PT on the host on a Broadwell machine, i.e.:
perf record -e intel_pt// usleep 1
perf script
would show decoded Intel PT records, no more :-\ But I'm clueless about
KVM internals, so just reporting the change in behaviour for this very
specific use case.
Now I don't know if this is something that would make Intel PT be usable
on Broadwell machines but wouldn't be required with newer chips, will
test with a Kaby Lake i5 7500 when back at my home office...
- Arnaldo
2017-06-21 14:48-0300, Arnaldo Carvalho de Melo:
> Em Fri, Mar 10, 2017 at 12:47:13PM +0100, David Hildenbrand escreveu:
> > vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling
> > VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an
> > indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be
> > called. This is obviously not the case if both are used independtly.
> > Calling VMXOFF without a previous VMXON will result in an exception.
> >
> > In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in
> > use by another VMM in hardware_enable(). So there can't really be
> > co-existance. If the other VMM is prepared for co-existance and does a
> > similar check, only one VMM can exist. If the other VMM is not prepared
> > and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with
> > X86_CR4_VMXE.
> >
> > As we also had bug reports related to clearing of vmcs with vmm_exclusive=0
> > this seems to be pretty much untested. So let's better drop it.
> >
> > While at it, directly move setting/clearing X86_CR4_VMXE into
> > kvm_cpu_vmxon/off.
>
> Oh well, I was using, as suggested by Alexander, this parameter to be
> able to use Intel PT on the host on a Broadwell machine, i.e.:
>
> perf record -e intel_pt// usleep 1
> perf script
We thought that blacklisting the KVM module was a good solution ...
Were you using KVM virtual machines with vmm_exclusive=0?
> would show decoded Intel PT records, no more :-\ But I'm clueless about
> KVM internals, so just reporting the change in behaviour for this very
> specific use case.
>
> Now I don't know if this is something that would make Intel PT be usable
> on Broadwell machines but wouldn't be required with newer chips, will
> test with a Kaby Lake i5 7500 when back at my home office...
Most likely, SDM 35.2.8.2 says:
Initial implementations of Intel Processor Trace do not support tracing
in VMX operation. Such processors indicate this by returning 0 for
IA32_VMX_MISC[bit 14].
so something akin to vmm_exclusive is about the only option there.
Please try if Kaby Lake is already an advanced implementation, because
we might need to disable PT when entering VMX non-root mode
(so the tracing packets are not be written into guest's memory, just
like with PEBS).
Thanks.