2018-10-19 14:16:55

by Vitaly Kuznetsov

[permalink] [raw]
Subject: [PATCH] x86/kvm/nVMX: tweak shadow fields

It seems we have some leftovers from times when 'unrestricted guest'
wasn't exposed to L1. Stop shadowing GUEST_CS_{BASE,LIMIT,AR_SELECTOR}
and GUEST_ES_BASE, shadow GUEST_SS_AR_BYTES as it was found that some
hypervisors (e.g. Hyper-V without Enlightened VMCS) access it pretty
often.

Suggested-by: Paolo Bonzini <[email protected]>
Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/kvm/vmx.c | 10 +++++-----
arch/x86/kvm/vmx_shadow_fields.h | 5 +----
2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index abeeb45d1c33..641a65b30685 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -12715,6 +12715,7 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
@@ -12722,6 +12723,7 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
+ vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
@@ -12731,12 +12733,13 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
- vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
+ vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
+ vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
@@ -12838,11 +12841,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
*/
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
- vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
- vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
- vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
- vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
+ vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
}

if (vmx->nested.nested_run_pending &&
diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx_shadow_fields.h
index cd0c75f6d037..132432f375c2 100644
--- a/arch/x86/kvm/vmx_shadow_fields.h
+++ b/arch/x86/kvm/vmx_shadow_fields.h
@@ -28,7 +28,6 @@
*/

/* 16-bits */
-SHADOW_FIELD_RW(GUEST_CS_SELECTOR)
SHADOW_FIELD_RW(GUEST_INTR_STATUS)
SHADOW_FIELD_RW(GUEST_PML_INDEX)
SHADOW_FIELD_RW(HOST_FS_SELECTOR)
@@ -47,8 +46,8 @@ SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
SHADOW_FIELD_RW(TPR_THRESHOLD)
-SHADOW_FIELD_RW(GUEST_CS_LIMIT)
SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
+SHADOW_FIELD_RW(GUEST_SS_AR_BYTES)
SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)

@@ -61,8 +60,6 @@ SHADOW_FIELD_RW(GUEST_CR0)
SHADOW_FIELD_RW(GUEST_CR3)
SHADOW_FIELD_RW(GUEST_CR4)
SHADOW_FIELD_RW(GUEST_RFLAGS)
-SHADOW_FIELD_RW(GUEST_CS_BASE)
-SHADOW_FIELD_RW(GUEST_ES_BASE)
SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
SHADOW_FIELD_RW(CR0_READ_SHADOW)
SHADOW_FIELD_RW(CR4_READ_SHADOW)
--
2.17.2



2018-10-19 16:47:15

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH] x86/kvm/nVMX: tweak shadow fields

On 19/10/2018 16:16, Vitaly Kuznetsov wrote:
> It seems we have some leftovers from times when 'unrestricted guest'
> wasn't exposed to L1. Stop shadowing GUEST_CS_{BASE,LIMIT,AR_SELECTOR}
> and GUEST_ES_BASE, shadow GUEST_SS_AR_BYTES as it was found that some
> hypervisors (e.g. Hyper-V without Enlightened VMCS) access it pretty
> often.
>
> Suggested-by: Paolo Bonzini <[email protected]>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>

Queued, thanks.

Paolo

> ---
> arch/x86/kvm/vmx.c | 10 +++++-----
> arch/x86/kvm/vmx_shadow_fields.h | 5 +----
> 2 files changed, 6 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index abeeb45d1c33..641a65b30685 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -12715,6 +12715,7 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
> if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
> HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> + vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
> vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
> vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
> @@ -12722,6 +12723,7 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
> vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
> vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
> vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
> + vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
> vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
> vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
> vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
> @@ -12731,12 +12733,13 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
> vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
> vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
> vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
> - vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
> vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
> vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
> vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
> vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
> vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
> + vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
> + vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
> vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
> vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
> vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
> @@ -12838,11 +12841,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> */
> if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
> HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
> - vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> - vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
> vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
> - vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
> - vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
> + vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
> }
>
> if (vmx->nested.nested_run_pending &&
> diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx_shadow_fields.h
> index cd0c75f6d037..132432f375c2 100644
> --- a/arch/x86/kvm/vmx_shadow_fields.h
> +++ b/arch/x86/kvm/vmx_shadow_fields.h
> @@ -28,7 +28,6 @@
> */
>
> /* 16-bits */
> -SHADOW_FIELD_RW(GUEST_CS_SELECTOR)
> SHADOW_FIELD_RW(GUEST_INTR_STATUS)
> SHADOW_FIELD_RW(GUEST_PML_INDEX)
> SHADOW_FIELD_RW(HOST_FS_SELECTOR)
> @@ -47,8 +46,8 @@ SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
> SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
> SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
> SHADOW_FIELD_RW(TPR_THRESHOLD)
> -SHADOW_FIELD_RW(GUEST_CS_LIMIT)
> SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
> +SHADOW_FIELD_RW(GUEST_SS_AR_BYTES)
> SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
> SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
>
> @@ -61,8 +60,6 @@ SHADOW_FIELD_RW(GUEST_CR0)
> SHADOW_FIELD_RW(GUEST_CR3)
> SHADOW_FIELD_RW(GUEST_CR4)
> SHADOW_FIELD_RW(GUEST_RFLAGS)
> -SHADOW_FIELD_RW(GUEST_CS_BASE)
> -SHADOW_FIELD_RW(GUEST_ES_BASE)
> SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
> SHADOW_FIELD_RW(CR0_READ_SHADOW)
> SHADOW_FIELD_RW(CR4_READ_SHADOW)
>


2018-11-09 22:12:24

by Jim Mattson

[permalink] [raw]
Subject: Re: [PATCH] x86/kvm/nVMX: tweak shadow fields

I'm not convinced that the "one size fits all" and "context-free"
approaches to VMCS shadowing are terribly effective.

For example, we never shadow VMX_INSTRUCTION_INFO, but if we just
reflected an exit to L1 for which that field is defined, there's
probably a good chance that L1 will use it. We always shadow
VM_EXIT_INTR_INFO, but if we didn't just reflect exit reason 0 to L1,
it's not likely to be read. If the L2 guest is in legacy mode or
compatibility mode, L1 is much more likely to be interested in the
contents of the descriptor cache than if the guest is in 64-bit mode.

Some hypervisors write TSC_OFFSET quite frequently. Others rarely.
Last time I checked (it's been a while), VirtualBox was always
interested in everything. :-) Kvm, Hyper-V, VMware, VirtualBox,
Parallels...they all have different patterns, and they change from
release to release.

Is it worth having a set of VMCS shadowing bitmaps per-vCPU, in order
to make better use of this feature?

On Fri, Oct 19, 2018 at 9:45 AM, Paolo Bonzini <[email protected]> wrote:
> On 19/10/2018 16:16, Vitaly Kuznetsov wrote:
>> It seems we have some leftovers from times when 'unrestricted guest'
>> wasn't exposed to L1. Stop shadowing GUEST_CS_{BASE,LIMIT,AR_SELECTOR}
>> and GUEST_ES_BASE, shadow GUEST_SS_AR_BYTES as it was found that some
>> hypervisors (e.g. Hyper-V without Enlightened VMCS) access it pretty
>> often.
>>
>> Suggested-by: Paolo Bonzini <[email protected]>
>> Signed-off-by: Vitaly Kuznetsov <[email protected]>
>
> Queued, thanks.
>
> Paolo
>
>> ---
>> arch/x86/kvm/vmx.c | 10 +++++-----
>> arch/x86/kvm/vmx_shadow_fields.h | 5 +----
>> 2 files changed, 6 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index abeeb45d1c33..641a65b30685 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -12715,6 +12715,7 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
>> if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
>> HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
>> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>> + vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
>> vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
>> vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
>> vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
>> @@ -12722,6 +12723,7 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
>> vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
>> vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
>> vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
>> + vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
>> vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
>> vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
>> vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
>> @@ -12731,12 +12733,13 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
>> vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
>> vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
>> vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
>> - vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
>> vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
>> vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
>> vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
>> vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
>> vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
>> + vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
>> + vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
>> vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
>> vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
>> vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
>> @@ -12838,11 +12841,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>> */
>> if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
>> HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
>> - vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
>> - vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
>> vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
>> - vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
>> - vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
>> + vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
>> }
>>
>> if (vmx->nested.nested_run_pending &&
>> diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx_shadow_fields.h
>> index cd0c75f6d037..132432f375c2 100644
>> --- a/arch/x86/kvm/vmx_shadow_fields.h
>> +++ b/arch/x86/kvm/vmx_shadow_fields.h
>> @@ -28,7 +28,6 @@
>> */
>>
>> /* 16-bits */
>> -SHADOW_FIELD_RW(GUEST_CS_SELECTOR)
>> SHADOW_FIELD_RW(GUEST_INTR_STATUS)
>> SHADOW_FIELD_RW(GUEST_PML_INDEX)
>> SHADOW_FIELD_RW(HOST_FS_SELECTOR)
>> @@ -47,8 +46,8 @@ SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
>> SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
>> SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
>> SHADOW_FIELD_RW(TPR_THRESHOLD)
>> -SHADOW_FIELD_RW(GUEST_CS_LIMIT)
>> SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
>> +SHADOW_FIELD_RW(GUEST_SS_AR_BYTES)
>> SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
>> SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
>>
>> @@ -61,8 +60,6 @@ SHADOW_FIELD_RW(GUEST_CR0)
>> SHADOW_FIELD_RW(GUEST_CR3)
>> SHADOW_FIELD_RW(GUEST_CR4)
>> SHADOW_FIELD_RW(GUEST_RFLAGS)
>> -SHADOW_FIELD_RW(GUEST_CS_BASE)
>> -SHADOW_FIELD_RW(GUEST_ES_BASE)
>> SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
>> SHADOW_FIELD_RW(CR0_READ_SHADOW)
>> SHADOW_FIELD_RW(CR4_READ_SHADOW)
>>
>

2018-11-12 14:40:26

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH] x86/kvm/nVMX: tweak shadow fields

Jim Mattson <[email protected]> writes:

> I'm not convinced that the "one size fits all" and "context-free"
> approaches to VMCS shadowing are terribly effective.
>
> For example, we never shadow VMX_INSTRUCTION_INFO, but if we just
> reflected an exit to L1 for which that field is defined, there's
> probably a good chance that L1 will use it. We always shadow
> VM_EXIT_INTR_INFO, but if we didn't just reflect exit reason 0 to L1,
> it's not likely to be read. If the L2 guest is in legacy mode or
> compatibility mode, L1 is much more likely to be interested in the
> contents of the descriptor cache than if the guest is in 64-bit mode.
>
> Some hypervisors write TSC_OFFSET quite frequently. Others rarely.
> Last time I checked (it's been a while), VirtualBox was always
> interested in everything. :-) Kvm, Hyper-V, VMware, VirtualBox,
> Parallels...they all have different patterns, and they change from
> release to release.
>
> Is it worth having a set of VMCS shadowing bitmaps per-vCPU, in order
> to make better use of this feature?

Per CPU or not, to improve the feature we'll probably need some sort of
an 'adaptive' algorithm picking which fields to shadow. I haven't
thought this through, especially read/write shadowing, but we can
probably start with an empty bitmap and later shadow it when we get over
some threshold of vmread/vmwrite exits we enabling shadowing. The
question is when we un-shadow it. For example, we can un-shadow a field
for writing every time we see it was not changed between two exits to L0
(so we're trying to write the same value to vmcs12).

--
Vitaly

2018-11-14 11:35:39

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH] x86/kvm/nVMX: tweak shadow fields

On 12/11/2018 15:39, Vitaly Kuznetsov wrote:
>> Is it worth having a set of VMCS shadowing bitmaps per-vCPU, in order
>> to make better use of this feature?
> Per CPU or not, to improve the feature we'll probably need some sort of
> an 'adaptive' algorithm picking which fields to shadow.

I agree, making it per-VCPU is not useful alone. The question is to
balance. The complexity and the number of fields that have to be copied
between the VMCSes.

If a vmexit type is rare, it makes sense not to shadow a field that
would be always defined by that vmexit type, rather than pay a fixed
price (even if it is loop overhead only) on all vmexits; this is the
case VMX_INSTRUCTION_INFO.

One thing that would make sense is to have separate shadow bitmaps for
32- and 64-bit L2. 32-bit L2 probably will need to shadow at least the
segment bases. But for 64-bit L2, the current set is small and nice.

There are still a few things that can be refined, but it's small things:

1) EXCEPTION_BITMAP which can go because everyone is probably using
eager FPU these days---and has always been if you have shadow VMCS;

2) CR0_READ_SHADOW/CR4_READ_SHADOW/GUEST_CR0/GUEST_CR4 were needed on
old KVM and would need to be tested on other hypervisors, but are
probably unnecessary;

3) I would be surprised if HOST_FS_BASE/HOST_GS_BASE are needed too,
though again you'd need testing on other hypervisors

Overall, I prefer simple code that optimizes the common case very well,
rather than complex code that tries to cover all bases...

Paolo