2023-09-15 05:27:26

by Tom Lendacky

[permalink] [raw]
Subject: [PATCH 0/2] SEV-ES TSC_AUX virtualization fix and optimization

This patch series provides a fix to the TSC_AUX virtualization support
and an optimization to reduce the number of WRMSRs to TSC_AUX when
it is virtualized.

---

Patches based on https://git.kernel.org/pub/scm/virt/kvm/kvm.git master
and commit:
7c7cce2cf7ee ("Merge tag 'kvmarm-fixes-6.6-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD")

Tom Lendacky (2):
KVM: SVM: Fix TSC_AUX virtualization setup
KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX

arch/x86/kvm/svm/sev.c | 41 ++++++++++++++++++++++++++++++++---------
arch/x86/kvm/svm/svm.c | 29 +++++++++++++++++++----------
arch/x86/kvm/svm/svm.h | 5 ++++-
3 files changed, 55 insertions(+), 20 deletions(-)

--
2.41.0


2023-09-15 05:44:21

by Tom Lendacky

[permalink] [raw]
Subject: [PATCH 2/2] KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX

When the TSC_AUX MSR is virtualized, the TSC_AUX value is swap type "B"
within the VMSA. This means that the guest value is loaded on VMRUN and
the host value is restored from the host save area on #VMEXIT.

Since the value is restored on #VMEXIT, the KVM user return MSR support
for TSC_AUX can be replaced by populating the host save area with current
host value of TSC_AUX. This replaces two WRMSR instructions with a single
RDMSR instruction.

Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/kvm/svm/sev.c | 14 +++++++++++++-
arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++----------
arch/x86/kvm/svm/svm.h | 4 +++-
3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 565c9de87c6d..1bbaae2fed96 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2969,6 +2969,7 @@ static void sev_es_init_vmcb_after_set_cpuid(struct vcpu_svm *svm)
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
(guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
guest_cpuid_has(vcpu, X86_FEATURE_RDPID))) {
+ svm->v_tsc_aux = true;
set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
svm_clr_intercept(svm, INTERCEPT_RDTSCP);
@@ -3071,8 +3072,10 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
sev_enc_bit));
}

-void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
+void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa)
{
+ u32 msr_hi;
+
/*
* All host state for SEV-ES guests is categorized into three swap types
* based on how it is handled by hardware during a world switch:
@@ -3109,6 +3112,15 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
}
+
+ /*
+ * If TSC_AUX virtualization is enabled, MSR_TSC_AUX is loaded but NOT
+ * saved by the CPU (Type-B). If TSC_AUX is not virtualized, the user
+ * return MSR support takes care of restoring MSR_TSC_AUX. This
+ * exchanges two WRMSRs for one RDMSR.
+ */
+ if (svm->v_tsc_aux)
+ rdmsr(MSR_TSC_AUX, hostsa->tsc_aux, msr_hi);
}

void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c58d5632e74a..905b1a2664ed 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1529,13 +1529,13 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
struct sev_es_save_area *hostsa;
hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);

- sev_es_prepare_switch_to_guest(hostsa);
+ sev_es_prepare_switch_to_guest(svm, hostsa);
}

if (tsc_scaling)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);

- if (likely(tsc_aux_uret_slot >= 0))
+ if (likely(tsc_aux_uret_slot >= 0) && !svm->v_tsc_aux)
kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);

svm->guest_state_loaded = true;
@@ -3090,15 +3090,21 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_TSC_AUX:
/*
- * TSC_AUX is usually changed only during boot and never read
- * directly. Intercept TSC_AUX instead of exposing it to the
- * guest via direct_access_msrs, and switch it via user return.
+ * If TSC_AUX is being virtualized, do not use the user return
+ * MSR support because TSC_AUX is restored on #VMEXIT.
*/
- preempt_disable();
- ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
- preempt_enable();
- if (ret)
- break;
+ if (!svm->v_tsc_aux) {
+ /*
+ * TSC_AUX is usually changed only during boot and never read
+ * directly. Intercept TSC_AUX instead of exposing it to the
+ * guest via direct_access_msrs, and switch it via user return.
+ */
+ preempt_disable();
+ ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
+ preempt_enable();
+ if (ret)
+ break;
+ }

svm->tsc_aux = data;
break;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index c0d17da46fae..49427858474e 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -213,6 +213,8 @@ struct vcpu_svm {
u32 asid;
u32 sysenter_esp_hi;
u32 sysenter_eip_hi;
+
+ bool v_tsc_aux;
uint64_t tsc_aux;

u64 msr_decfg;
@@ -690,7 +692,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
-void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
+void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);

/* vmenter.S */
--
2.41.0

2023-09-15 06:39:39

by Tom Lendacky

[permalink] [raw]
Subject: [PATCH 1/2] KVM: SVM: Fix TSC_AUX virtualization setup

The checks for virtualizing TSC_AUX occur during the vCPU reset processing
path. However, at the time of initial vCPU reset processing, when the vCPU
is first created, not all of the guest CPUID information has been set. In
this case the RDTSCP and RDPID feature support for the guest is not in
place and so TSC_AUX virtualization is not established.

This continues for each vCPU created for the guest. On the first boot of
an AP, vCPU reset processing is executed as a result of an APIC INIT
event, this time with all of the guest CPUID information set, resulting
in TSC_AUX virtualization being enabled, but only for the APs. The BSP
always sees a TSC_AUX value of 0 which probably went unnoticed because,
at least for Linux, the BSP TSC_AUX value is 0.

Move the TSC_AUX virtualization enablement into the vcpu_after_set_cpuid()
path to allow for proper initialization of the support after the guest
CPUID information has been set.

Fixes: 296d5a17e793 ("KVM: SEV-ES: Use V_TSC_AUX if available instead of RDTSC/MSR_TSC_AUX intercepts")
Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/kvm/svm/sev.c | 27 +++++++++++++++++++--------
arch/x86/kvm/svm/svm.c | 3 +++
arch/x86/kvm/svm/svm.h | 1 +
3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index b9a0a939d59f..565c9de87c6d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2962,6 +2962,25 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
count, in);
}

+static void sev_es_init_vmcb_after_set_cpuid(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+
+ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
+ (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
+ guest_cpuid_has(vcpu, X86_FEATURE_RDPID))) {
+ set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
+ if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+ svm_clr_intercept(svm, INTERCEPT_RDTSCP);
+ }
+}
+
+void sev_init_vmcb_after_set_cpuid(struct vcpu_svm *svm)
+{
+ if (sev_es_guest(svm->vcpu.kvm))
+ sev_es_init_vmcb_after_set_cpuid(svm);
+}
+
static void sev_es_init_vmcb(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -3024,14 +3043,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
-
- if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
- (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) ||
- guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) {
- set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
- if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP))
- svm_clr_intercept(svm, INTERCEPT_RDTSCP);
- }
}

void sev_init_vmcb(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f283eb47f6ac..c58d5632e74a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1225,6 +1225,9 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
}
+
+ if (sev_guest(vcpu->kvm))
+ sev_init_vmcb_after_set_cpuid(svm);
}

static void init_vmcb(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f41253958357..c0d17da46fae 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -684,6 +684,7 @@ void __init sev_hardware_setup(void);
void sev_hardware_unsetup(void);
int sev_cpu_init(struct svm_cpu_data *sd);
void sev_init_vmcb(struct vcpu_svm *svm);
+void sev_init_vmcb_after_set_cpuid(struct vcpu_svm *svm);
void sev_free_vcpu(struct kvm_vcpu *vcpu);
int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
--
2.41.0

2023-09-15 17:21:22

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 2/2] KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX

On Fri, Sep 15, 2023, Sean Christopherson wrote:
> On Thu, Sep 14, 2023, Tom Lendacky wrote:
> > When the TSC_AUX MSR is virtualized, the TSC_AUX value is swap type "B"
> > within the VMSA. This means that the guest value is loaded on VMRUN and
> > the host value is restored from the host save area on #VMEXIT.
> >
> > Since the value is restored on #VMEXIT, the KVM user return MSR support
> > for TSC_AUX can be replaced by populating the host save area with current
> > host value of TSC_AUX. This replaces two WRMSR instructions with a single
> > RDMSR instruction.
> >
> > Signed-off-by: Tom Lendacky <[email protected]>
> > ---
> > arch/x86/kvm/svm/sev.c | 14 +++++++++++++-
> > arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++----------
> > arch/x86/kvm/svm/svm.h | 4 +++-
> > 3 files changed, 32 insertions(+), 12 deletions(-)
> >
> > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > index 565c9de87c6d..1bbaae2fed96 100644
> > --- a/arch/x86/kvm/svm/sev.c
> > +++ b/arch/x86/kvm/svm/sev.c
> > @@ -2969,6 +2969,7 @@ static void sev_es_init_vmcb_after_set_cpuid(struct vcpu_svm *svm)
> > if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
> > (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
> > guest_cpuid_has(vcpu, X86_FEATURE_RDPID))) {
> > + svm->v_tsc_aux = true;
> > set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
> > if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> > svm_clr_intercept(svm, INTERCEPT_RDTSCP);
> > @@ -3071,8 +3072,10 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
> > sev_enc_bit));
> > }
> >
> > -void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
> > +void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa)
> > {
> > + u32 msr_hi;
> > +
> > /*
> > * All host state for SEV-ES guests is categorized into three swap types
> > * based on how it is handled by hardware during a world switch:
> > @@ -3109,6 +3112,15 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
> > hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
> > hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
> > }
> > +
> > + /*
> > + * If TSC_AUX virtualization is enabled, MSR_TSC_AUX is loaded but NOT
> > + * saved by the CPU (Type-B). If TSC_AUX is not virtualized, the user
> > + * return MSR support takes care of restoring MSR_TSC_AUX. This
> > + * exchanges two WRMSRs for one RDMSR.
> > + */
> > + if (svm->v_tsc_aux)
> > + rdmsr(MSR_TSC_AUX, hostsa->tsc_aux, msr_hi);
>
> IIUC, when V_TSC_AUX is supported, SEV-ES guests context switch MSR_TSC_AUX
> regardless of what has been exposed to the guest. So rather than condition the
> hostsa->tsc_aux update on guest CPUID, just do it if V_TSC_AUX is supported.
>
> And then to avoid the RDMSR, which is presumably the motivation for checking
> guest CPUID, grab the host value from user return framework. The host values
> are per-CPU, but constant after boot, so the only requirement is that KVM sets
> up MSR_TSC_AUX in the user return framework.

Actually, duh. The save area is also per-CPU, so just fill hostsa->tsc_aux in
svm_hardware_setup() and then sev_es_prepare_switch_to_guest() never has to do
anything.

2023-09-15 17:34:52

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: SVM: Fix TSC_AUX virtualization setup

On Fri, Sep 15, 2023, Tom Lendacky wrote:
> On 9/14/23 15:48, Tom Lendacky wrote:
> > On 9/14/23 15:28, Sean Christopherson wrote:
> > > On Thu, Sep 14, 2023, Tom Lendacky wrote:
>
> >
> > >
> > > > +        if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> > > > +            svm_clr_intercept(svm, INTERCEPT_RDTSCP);
> > >
> > > Same thing here.
> >
> > Will do.
>
> For RDTSCP, svm_recalc_instruction_intercepts() will set/clear the RDTSCP
> intercept as part of the svm_vcpu_set_after_cpuid() path, but it will only
> do it based on kvm_cpu_cap_has(X86_FEATURE_RDTSCP) being true, which is very
> likely.
>
> Do you think that is good enough and we can drop the setting and clearing of
> the RDTSCP intercept in the sev_es_vcpu_set_after_cpuid() function and only
> deal with the TSC_AUX MSR intercept?

The common handling should be good enough.

> On a side note, it looks like RDTSCP would not be intercepted if the KVM cap
> X86_FEATURE_RDTSCP feature is cleared, however unlikely, in
> kvm_set_cpu_caps() and RDTSCP is not advertised to the guest (assuming the
> guest is ignoring the RDTSCP CPUID bit).

Hmm, yes, though the only scenario in which KVM clears RDTSCP on AMD comes with
a WARN (it's a guard against KVM bugs). If the guest ignores CPUID and uses
RDTSCP anyways, the guest deserves its death, and leaking the host pCPU doesn't
seem like a major issue.

That said, if hardware behavior is to ignore unknown intercepts, e.g. if KVM can
safely set INTERCEPT_RDTSCP even when hardware doesn't support said intercept,
then I wouldn't be opposed to doing:

/*
* Intercept INVPCID if shadow paging is enabled to sync/free shadow
* roots, or if INVPCID is disabled in the guest to inject #UD.
*/
if (!kvm_cpu_cap_has(X86_FEATURE_INVPCID) ||
!npt_enabled || !guest_cpuid_has(&svm->vcpu, X86_FEATURE_INVPCID))
svm_set_intercept(svm, INTERCEPT_INVPCID);
else
svm_clr_intercept(svm, INTERCEPT_INVPCID);

if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
svm_clr_intercept(svm, INTERCEPT_RDTSCP);
else
svm_set_intercept(svm, INTERCEPT_RDTSCP);

Alternatively, KVM could check boot_cpu_has() instead or kvm_cpu_cap_has(), but
that's not foolproof either, e.g. see Intel's of hiding PCID to workaround the
TLB flushing bug on Alderlake. So my vote would either be to keep things as-is,
or do the above (if that's safe).

2023-09-15 19:02:07

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 2/2] KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX

On Thu, Sep 14, 2023, Tom Lendacky wrote:
> When the TSC_AUX MSR is virtualized, the TSC_AUX value is swap type "B"
> within the VMSA. This means that the guest value is loaded on VMRUN and
> the host value is restored from the host save area on #VMEXIT.
>
> Since the value is restored on #VMEXIT, the KVM user return MSR support
> for TSC_AUX can be replaced by populating the host save area with current
> host value of TSC_AUX. This replaces two WRMSR instructions with a single
> RDMSR instruction.
>
> Signed-off-by: Tom Lendacky <[email protected]>
> ---
> arch/x86/kvm/svm/sev.c | 14 +++++++++++++-
> arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++----------
> arch/x86/kvm/svm/svm.h | 4 +++-
> 3 files changed, 32 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 565c9de87c6d..1bbaae2fed96 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2969,6 +2969,7 @@ static void sev_es_init_vmcb_after_set_cpuid(struct vcpu_svm *svm)
> if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
> (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
> guest_cpuid_has(vcpu, X86_FEATURE_RDPID))) {
> + svm->v_tsc_aux = true;
> set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
> if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> svm_clr_intercept(svm, INTERCEPT_RDTSCP);
> @@ -3071,8 +3072,10 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
> sev_enc_bit));
> }
>
> -void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
> +void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa)
> {
> + u32 msr_hi;
> +
> /*
> * All host state for SEV-ES guests is categorized into three swap types
> * based on how it is handled by hardware during a world switch:
> @@ -3109,6 +3112,15 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
> hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
> hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
> }
> +
> + /*
> + * If TSC_AUX virtualization is enabled, MSR_TSC_AUX is loaded but NOT
> + * saved by the CPU (Type-B). If TSC_AUX is not virtualized, the user
> + * return MSR support takes care of restoring MSR_TSC_AUX. This
> + * exchanges two WRMSRs for one RDMSR.
> + */
> + if (svm->v_tsc_aux)
> + rdmsr(MSR_TSC_AUX, hostsa->tsc_aux, msr_hi);

IIUC, when V_TSC_AUX is supported, SEV-ES guests context switch MSR_TSC_AUX
regardless of what has been exposed to the guest. So rather than condition the
hostsa->tsc_aux update on guest CPUID, just do it if V_TSC_AUX is supported.

And then to avoid the RDMSR, which is presumably the motivation for checking
guest CPUID, grab the host value from user return framework. The host values
are per-CPU, but constant after boot, so the only requirement is that KVM sets
up MSR_TSC_AUX in the user return framework.

> }
>
> void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index c58d5632e74a..905b1a2664ed 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1529,13 +1529,13 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
> struct sev_es_save_area *hostsa;
> hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
>
> - sev_es_prepare_switch_to_guest(hostsa);
> + sev_es_prepare_switch_to_guest(svm, hostsa);
> }
>
> if (tsc_scaling)
> __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
>
> - if (likely(tsc_aux_uret_slot >= 0))
> + if (likely(tsc_aux_uret_slot >= 0) && !svm->v_tsc_aux)

And then this too becomes something like

if (likely(tsc_aux_uret_slot >= 0) &&
(!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm)))

> kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
>
> svm->guest_state_loaded = true;
> @@ -3090,15 +3090,21 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
> break;
> case MSR_TSC_AUX:

And this also is simply:

if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm))
break;

Because svm->tsc_aux will never be consumed.

> /*
> - * TSC_AUX is usually changed only during boot and never read
> - * directly. Intercept TSC_AUX instead of exposing it to the
> - * guest via direct_access_msrs, and switch it via user return.
> + * If TSC_AUX is being virtualized, do not use the user return
> + * MSR support because TSC_AUX is restored on #VMEXIT.
> */
> - preempt_disable();
> - ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
> - preempt_enable();
> - if (ret)
> - break;
> + if (!svm->v_tsc_aux) {
> + /*
> + * TSC_AUX is usually changed only during boot and never read
> + * directly. Intercept TSC_AUX instead of exposing it to the
> + * guest via direct_access_msrs, and switch it via user return.
> + */
> + preempt_disable();
> + ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
> + preempt_enable();
> + if (ret)
> + break;
> + }
>
> svm->tsc_aux = data;
> break;

2023-09-15 21:10:18

by Tom Lendacky

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: SVM: Fix TSC_AUX virtualization setup

On 9/15/23 12:32, Sean Christopherson wrote:
> On Fri, Sep 15, 2023, Tom Lendacky wrote:
>> On 9/14/23 15:48, Tom Lendacky wrote:
>>> On 9/14/23 15:28, Sean Christopherson wrote:
>>>> On Thu, Sep 14, 2023, Tom Lendacky wrote:
>>
>>>
>>>>
>>>>> +        if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
>>>>> +            svm_clr_intercept(svm, INTERCEPT_RDTSCP);
>>>>
>>>> Same thing here.
>>>
>>> Will do.
>>
>> For RDTSCP, svm_recalc_instruction_intercepts() will set/clear the RDTSCP
>> intercept as part of the svm_vcpu_set_after_cpuid() path, but it will only
>> do it based on kvm_cpu_cap_has(X86_FEATURE_RDTSCP) being true, which is very
>> likely.
>>
>> Do you think that is good enough and we can drop the setting and clearing of
>> the RDTSCP intercept in the sev_es_vcpu_set_after_cpuid() function and only
>> deal with the TSC_AUX MSR intercept?
>
> The common handling should be good enough.
>
>> On a side note, it looks like RDTSCP would not be intercepted if the KVM cap
>> X86_FEATURE_RDTSCP feature is cleared, however unlikely, in
>> kvm_set_cpu_caps() and RDTSCP is not advertised to the guest (assuming the
>> guest is ignoring the RDTSCP CPUID bit).
>
> Hmm, yes, though the only scenario in which KVM clears RDTSCP on AMD comes with
> a WARN (it's a guard against KVM bugs). If the guest ignores CPUID and uses
> RDTSCP anyways, the guest deserves its death, and leaking the host pCPU doesn't
> seem like a major issue.
>
> That said, if hardware behavior is to ignore unknown intercepts, e.g. if KVM can
> safely set INTERCEPT_RDTSCP even when hardware doesn't support said intercept,
> then I wouldn't be opposed to doing:
>
> /*
> * Intercept INVPCID if shadow paging is enabled to sync/free shadow
> * roots, or if INVPCID is disabled in the guest to inject #UD.
> */
> if (!kvm_cpu_cap_has(X86_FEATURE_INVPCID) ||
> !npt_enabled || !guest_cpuid_has(&svm->vcpu, X86_FEATURE_INVPCID))
> svm_set_intercept(svm, INTERCEPT_INVPCID);
> else
> svm_clr_intercept(svm, INTERCEPT_INVPCID);
>
> if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
> guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> svm_clr_intercept(svm, INTERCEPT_RDTSCP);
> else
> svm_set_intercept(svm, INTERCEPT_RDTSCP);
>
> Alternatively, KVM could check boot_cpu_has() instead or kvm_cpu_cap_has(), but
> that's not foolproof either, e.g. see Intel's of hiding PCID to workaround the
> TLB flushing bug on Alderlake. So my vote would either be to keep things as-is,
> or do the above (if that's safe).

Keep things as-is works for me :)

Thanks,
Tom

2023-09-15 22:15:16

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: SVM: Fix TSC_AUX virtualization setup

On Fri, Sep 15, 2023, Tom Lendacky wrote:
> On 9/14/23 16:13, Sean Christopherson wrote:
> This toggling possibility raises a question related to the second patch in
> this series that eliminates the use of the user return MSR for TSC_AUX.
> Depending on when the interfaces are called (set CPUID, host-initiated WRMSR
> of TSC_AUX, set CPUID again), I think we could end up in a state where the
> host TSC_AUX may not get restored properly, not 100% sure at the moment,
> though.

Give me a few minutes to respond to patch 2, I think it can be much simpler, more
performant, and avoid any races.

> Let me drop that patch from the series for now and just send the fix(es).
> I'll work through the other scenarios and code paths and send the user
> return MSR optimization as a separate series later.

2023-09-16 03:24:48

by Tom Lendacky

[permalink] [raw]
Subject: Re: [PATCH 2/2] KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX

On 9/15/23 09:51, Sean Christopherson wrote:
> On Fri, Sep 15, 2023, Sean Christopherson wrote:
>> On Thu, Sep 14, 2023, Tom Lendacky wrote:
>>> When the TSC_AUX MSR is virtualized, the TSC_AUX value is swap type "B"
>>> within the VMSA. This means that the guest value is loaded on VMRUN and
>>> the host value is restored from the host save area on #VMEXIT.
>>>
>>> Since the value is restored on #VMEXIT, the KVM user return MSR support
>>> for TSC_AUX can be replaced by populating the host save area with current
>>> host value of TSC_AUX. This replaces two WRMSR instructions with a single
>>> RDMSR instruction.
>>>
>>> Signed-off-by: Tom Lendacky <[email protected]>
>>> ---
>>> arch/x86/kvm/svm/sev.c | 14 +++++++++++++-
>>> arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++----------
>>> arch/x86/kvm/svm/svm.h | 4 +++-
>>> 3 files changed, 32 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
>>> index 565c9de87c6d..1bbaae2fed96 100644
>>> --- a/arch/x86/kvm/svm/sev.c
>>> +++ b/arch/x86/kvm/svm/sev.c
>>> @@ -2969,6 +2969,7 @@ static void sev_es_init_vmcb_after_set_cpuid(struct vcpu_svm *svm)
>>> if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
>>> (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
>>> guest_cpuid_has(vcpu, X86_FEATURE_RDPID))) {
>>> + svm->v_tsc_aux = true;
>>> set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
>>> if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
>>> svm_clr_intercept(svm, INTERCEPT_RDTSCP);
>>> @@ -3071,8 +3072,10 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
>>> sev_enc_bit));
>>> }
>>>
>>> -void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
>>> +void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa)
>>> {
>>> + u32 msr_hi;
>>> +
>>> /*
>>> * All host state for SEV-ES guests is categorized into three swap types
>>> * based on how it is handled by hardware during a world switch:
>>> @@ -3109,6 +3112,15 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
>>> hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
>>> hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
>>> }
>>> +
>>> + /*
>>> + * If TSC_AUX virtualization is enabled, MSR_TSC_AUX is loaded but NOT
>>> + * saved by the CPU (Type-B). If TSC_AUX is not virtualized, the user
>>> + * return MSR support takes care of restoring MSR_TSC_AUX. This
>>> + * exchanges two WRMSRs for one RDMSR.
>>> + */
>>> + if (svm->v_tsc_aux)
>>> + rdmsr(MSR_TSC_AUX, hostsa->tsc_aux, msr_hi);
>>
>> IIUC, when V_TSC_AUX is supported, SEV-ES guests context switch MSR_TSC_AUX
>> regardless of what has been exposed to the guest. So rather than condition the
>> hostsa->tsc_aux update on guest CPUID, just do it if V_TSC_AUX is supported.
>>
>> And then to avoid the RDMSR, which is presumably the motivation for checking
>> guest CPUID, grab the host value from user return framework. The host values
>> are per-CPU, but constant after boot, so the only requirement is that KVM sets
>> up MSR_TSC_AUX in the user return framework.
>
> Actually, duh. The save area is also per-CPU, so just fill hostsa->tsc_aux in
> svm_hardware_setup() and then sev_es_prepare_switch_to_guest() never has to do
> anything.

Ah, right, because Linux never changes TSC_AUX post boot. Much simpler.

I'll rework based on the comments and send a v2 series.

Thanks,
Tom