The scaling information in sub-leaf 1 should match the values in the
'vcpu_info' sub-structure 'time_info' (a.k.a. pvclock_vcpu_time_info) which
is shared with the guest. The offset values are not set since a TSC offset
is already applied.
The host TSC frequency should also be set in sub-leaf 2.
This patch adds a new kvm_xen_set_cpuid() function that scans for the
relevant CPUID leaf when the CPUID information is updated by the VMM and
stashes pointers to the sub-leaves in the kvm_vcpu_xen structure.
The values are then updated by a call to the, also new,
kvm_xen_setup_tsc_info() function made at the end of
kvm_guest_time_update() just before entering the guest.
Signed-off-by: Paul Durrant <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/cpuid.c | 2 ++
arch/x86/kvm/x86.c | 1 +
arch/x86/kvm/xen.c | 41 +++++++++++++++++++++++++++++++++
arch/x86/kvm/xen.h | 10 ++++++++
5 files changed, 56 insertions(+)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1038ccb7056a..f77a4940542f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -638,6 +638,8 @@ struct kvm_vcpu_xen {
struct hrtimer timer;
int poll_evtchn;
struct timer_list poll_timer;
+ struct kvm_cpuid_entry2 *tsc_info_1;
+ struct kvm_cpuid_entry2 *tsc_info_2;
};
struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index d47222ab8e6e..eb6cd88c974a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -25,6 +25,7 @@
#include "mmu.h"
#include "trace.h"
#include "pmu.h"
+#include "xen.h"
/*
* Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
@@ -310,6 +311,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
__cr4_reserved_bits(guest_cpuid_has, vcpu);
kvm_hv_set_cpuid(vcpu);
+ kvm_xen_set_cpuid(vcpu);
/* Invoke the vendor callback only after the above state is updated. */
static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00e23dc518e0..8b45f9975e45 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3123,6 +3123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
if (vcpu->xen.vcpu_time_info_cache.active)
kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
+ kvm_xen_setup_tsc_info(v);
return 0;
}
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 610beba35907..a016ff85264d 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -10,6 +10,9 @@
#include "xen.h"
#include "hyperv.h"
#include "lapic.h"
+#include "cpuid.h"
+
+#include <asm/xen/cpuid.h>
#include <linux/eventfd.h>
#include <linux/kvm_host.h>
@@ -1855,3 +1858,41 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
if (kvm->arch.xen_hvm_config.msr)
static_branch_slow_dec_deferred(&kvm_xen_enabled);
}
+
+void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ u32 base = 0;
+ u32 function;
+
+ for_each_possible_hypervisor_cpuid_base(function) {
+ struct kvm_cpuid_entry2 *entry = kvm_find_cpuid_entry(vcpu, function, 0);
+
+ if (entry &&
+ entry->ebx == XEN_CPUID_SIGNATURE_EBX &&
+ entry->ecx == XEN_CPUID_SIGNATURE_ECX &&
+ entry->edx == XEN_CPUID_SIGNATURE_EDX) {
+ base = function;
+ break;
+ }
+ }
+ if (!base)
+ return;
+
+ function = base | XEN_CPUID_LEAF(3);
+ vcpu->arch.xen.tsc_info_1 = kvm_find_cpuid_entry(vcpu, function, 1);
+ vcpu->arch.xen.tsc_info_2 = kvm_find_cpuid_entry(vcpu, function, 2);
+}
+
+void kvm_xen_setup_tsc_info(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *entry = vcpu->arch.xen.tsc_info_1;
+
+ if (entry) {
+ entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
+ entry->edx = vcpu->arch.hv_clock.tsc_shift;
+ }
+
+ entry = vcpu->arch.xen.tsc_info_2;
+ if (entry)
+ entry->eax = vcpu->arch.hw_tsc_khz;
+}
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index 532a535a9e99..1afb663318a9 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -32,6 +32,8 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe,
int kvm_xen_setup_evtchn(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
+void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu);
+void kvm_xen_setup_tsc_info(struct kvm_vcpu *vcpu);
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
@@ -135,6 +137,14 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu)
{
return false;
}
+
+static inline void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline void kvm_xen_setup_tsc_info(struct kvm_vcpu *vcpu)
+{
+}
#endif
int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
--
2.20.1
Paul Durrant <[email protected]> writes:
> The scaling information in sub-leaf 1 should match the values in the
> 'vcpu_info' sub-structure 'time_info' (a.k.a. pvclock_vcpu_time_info) which
> is shared with the guest. The offset values are not set since a TSC offset
> is already applied.
> The host TSC frequency should also be set in sub-leaf 2.
>
> This patch adds a new kvm_xen_set_cpuid() function that scans for the
> relevant CPUID leaf when the CPUID information is updated by the VMM and
> stashes pointers to the sub-leaves in the kvm_vcpu_xen structure.
> The values are then updated by a call to the, also new,
> kvm_xen_setup_tsc_info() function made at the end of
> kvm_guest_time_update() just before entering the guest.
>
> Signed-off-by: Paul Durrant <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 2 ++
> arch/x86/kvm/cpuid.c | 2 ++
> arch/x86/kvm/x86.c | 1 +
> arch/x86/kvm/xen.c | 41 +++++++++++++++++++++++++++++++++
> arch/x86/kvm/xen.h | 10 ++++++++
> 5 files changed, 56 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 1038ccb7056a..f77a4940542f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -638,6 +638,8 @@ struct kvm_vcpu_xen {
> struct hrtimer timer;
> int poll_evtchn;
> struct timer_list poll_timer;
> + struct kvm_cpuid_entry2 *tsc_info_1;
> + struct kvm_cpuid_entry2 *tsc_info_2;
> };
>
> struct kvm_vcpu_arch {
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index d47222ab8e6e..eb6cd88c974a 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -25,6 +25,7 @@
> #include "mmu.h"
> #include "trace.h"
> #include "pmu.h"
> +#include "xen.h"
>
> /*
> * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
> @@ -310,6 +311,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> __cr4_reserved_bits(guest_cpuid_has, vcpu);
>
> kvm_hv_set_cpuid(vcpu);
> + kvm_xen_set_cpuid(vcpu);
>
> /* Invoke the vendor callback only after the above state is updated. */
> static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 00e23dc518e0..8b45f9975e45 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3123,6 +3123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> if (vcpu->xen.vcpu_time_info_cache.active)
> kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
> kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
> + kvm_xen_setup_tsc_info(v);
> return 0;
> }
>
> diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> index 610beba35907..a016ff85264d 100644
> --- a/arch/x86/kvm/xen.c
> +++ b/arch/x86/kvm/xen.c
> @@ -10,6 +10,9 @@
> #include "xen.h"
> #include "hyperv.h"
> #include "lapic.h"
> +#include "cpuid.h"
> +
> +#include <asm/xen/cpuid.h>
>
> #include <linux/eventfd.h>
> #include <linux/kvm_host.h>
> @@ -1855,3 +1858,41 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
> if (kvm->arch.xen_hvm_config.msr)
> static_branch_slow_dec_deferred(&kvm_xen_enabled);
> }
> +
> +void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu)
> +{
> + u32 base = 0;
> + u32 function;
> +
> + for_each_possible_hypervisor_cpuid_base(function) {
> + struct kvm_cpuid_entry2 *entry = kvm_find_cpuid_entry(vcpu, function, 0);
> +
> + if (entry &&
> + entry->ebx == XEN_CPUID_SIGNATURE_EBX &&
> + entry->ecx == XEN_CPUID_SIGNATURE_ECX &&
> + entry->edx == XEN_CPUID_SIGNATURE_EDX) {
> + base = function;
> + break;
> + }
> + }
> + if (!base)
> + return;
> +
> + function = base | XEN_CPUID_LEAF(3);
> + vcpu->arch.xen.tsc_info_1 = kvm_find_cpuid_entry(vcpu, function, 1);
> + vcpu->arch.xen.tsc_info_2 = kvm_find_cpuid_entry(vcpu, function, 2);
> +}
Imagine the following scenario: CPUID data was supplied with Xen CPUID
leaves first and then got updated with new information which doesn't
have Xen CPUID info (e.g. has Hyper-V signature instead of Xen in the
same 0x40000000 leaf). Won't arch.xen.tsc_info_1/arch.xen.tsc_info_2
pointers become dangling here after we free the old CPUID data ...
> +
> +void kvm_xen_setup_tsc_info(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_cpuid_entry2 *entry = vcpu->arch.xen.tsc_info_1;
> +
> + if (entry) {
> + entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
> + entry->edx = vcpu->arch.hv_clock.tsc_shift;
... just to crash everything here?
> + }
> +
> + entry = vcpu->arch.xen.tsc_info_2;
> + if (entry)
> + entry->eax = vcpu->arch.hw_tsc_khz;
> +}
> diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
> index 532a535a9e99..1afb663318a9 100644
> --- a/arch/x86/kvm/xen.h
> +++ b/arch/x86/kvm/xen.h
> @@ -32,6 +32,8 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe,
> int kvm_xen_setup_evtchn(struct kvm *kvm,
> struct kvm_kernel_irq_routing_entry *e,
> const struct kvm_irq_routing_entry *ue);
> +void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu);
> +void kvm_xen_setup_tsc_info(struct kvm_vcpu *vcpu);
>
> static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
> {
> @@ -135,6 +137,14 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu)
> {
> return false;
> }
> +
> +static inline void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu)
> +{
> +}
> +
> +static inline void kvm_xen_setup_tsc_info(struct kvm_vcpu *vcpu)
> +{
> +}
> #endif
>
> int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
--
Vitaly
> -----Original Message-----
> From: Vitaly Kuznetsov <[email protected]>
> Sent: 22 June 2022 10:40
> To: Durrant, Paul <[email protected]>
> Cc: Paolo Bonzini <[email protected]>; Sean Christopherson <[email protected]>; Wanpeng Li
> <[email protected]>; Jim Mattson <[email protected]>; Joerg Roedel <[email protected]>; Thomas
> Gleixner <[email protected]>; Ingo Molnar <[email protected]>; Borislav Petkov <[email protected]>; Dave
> Hansen <[email protected]>; H. Peter Anvin <[email protected]>; [email protected];
> [email protected]; [email protected]
> Subject: RE: [EXTERNAL][PATCH] KVM: x86/xen: Update Xen CPUID Leaf 4 (tsc info) sub-leaves, if present
>
> CAUTION: This email originated from outside of the organization. Do not click links or open
> attachments unless you can confirm the sender and know the content is safe.
>
>
>
> Paul Durrant <[email protected]> writes:
>
> > The scaling information in sub-leaf 1 should match the values in the
> > 'vcpu_info' sub-structure 'time_info' (a.k.a. pvclock_vcpu_time_info) which
> > is shared with the guest. The offset values are not set since a TSC offset
> > is already applied.
> > The host TSC frequency should also be set in sub-leaf 2.
> >
> > This patch adds a new kvm_xen_set_cpuid() function that scans for the
> > relevant CPUID leaf when the CPUID information is updated by the VMM and
> > stashes pointers to the sub-leaves in the kvm_vcpu_xen structure.
> > The values are then updated by a call to the, also new,
> > kvm_xen_setup_tsc_info() function made at the end of
> > kvm_guest_time_update() just before entering the guest.
> >
> > Signed-off-by: Paul Durrant <[email protected]>
> > ---
> > arch/x86/include/asm/kvm_host.h | 2 ++
> > arch/x86/kvm/cpuid.c | 2 ++
> > arch/x86/kvm/x86.c | 1 +
> > arch/x86/kvm/xen.c | 41 +++++++++++++++++++++++++++++++++
> > arch/x86/kvm/xen.h | 10 ++++++++
> > 5 files changed, 56 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 1038ccb7056a..f77a4940542f 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -638,6 +638,8 @@ struct kvm_vcpu_xen {
> > struct hrtimer timer;
> > int poll_evtchn;
> > struct timer_list poll_timer;
> > + struct kvm_cpuid_entry2 *tsc_info_1;
> > + struct kvm_cpuid_entry2 *tsc_info_2;
> > };
> >
> > struct kvm_vcpu_arch {
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index d47222ab8e6e..eb6cd88c974a 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -25,6 +25,7 @@
> > #include "mmu.h"
> > #include "trace.h"
> > #include "pmu.h"
> > +#include "xen.h"
> >
> > /*
> > * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
> > @@ -310,6 +311,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> > __cr4_reserved_bits(guest_cpuid_has, vcpu);
> >
> > kvm_hv_set_cpuid(vcpu);
> > + kvm_xen_set_cpuid(vcpu);
> >
> > /* Invoke the vendor callback only after the above state is updated. */
> > static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 00e23dc518e0..8b45f9975e45 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -3123,6 +3123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> > if (vcpu->xen.vcpu_time_info_cache.active)
> > kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
> > kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
> > + kvm_xen_setup_tsc_info(v);
> > return 0;
> > }
> >
> > diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> > index 610beba35907..a016ff85264d 100644
> > --- a/arch/x86/kvm/xen.c
> > +++ b/arch/x86/kvm/xen.c
> > @@ -10,6 +10,9 @@
> > #include "xen.h"
> > #include "hyperv.h"
> > #include "lapic.h"
> > +#include "cpuid.h"
> > +
> > +#include <asm/xen/cpuid.h>
> >
> > #include <linux/eventfd.h>
> > #include <linux/kvm_host.h>
> > @@ -1855,3 +1858,41 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
> > if (kvm->arch.xen_hvm_config.msr)
> > static_branch_slow_dec_deferred(&kvm_xen_enabled);
> > }
> > +
> > +void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu)
> > +{
> > + u32 base = 0;
> > + u32 function;
> > +
> > + for_each_possible_hypervisor_cpuid_base(function) {
> > + struct kvm_cpuid_entry2 *entry = kvm_find_cpuid_entry(vcpu, function, 0);
> > +
> > + if (entry &&
> > + entry->ebx == XEN_CPUID_SIGNATURE_EBX &&
> > + entry->ecx == XEN_CPUID_SIGNATURE_ECX &&
> > + entry->edx == XEN_CPUID_SIGNATURE_EDX) {
> > + base = function;
> > + break;
> > + }
> > + }
> > + if (!base)
> > + return;
> > +
> > + function = base | XEN_CPUID_LEAF(3);
> > + vcpu->arch.xen.tsc_info_1 = kvm_find_cpuid_entry(vcpu, function, 1);
> > + vcpu->arch.xen.tsc_info_2 = kvm_find_cpuid_entry(vcpu, function, 2);
> > +}
>
> Imagine the following scenario: CPUID data was supplied with Xen CPUID
> leaves first and then got updated with new information which doesn't
> have Xen CPUID info (e.g. has Hyper-V signature instead of Xen in the
> same 0x40000000 leaf). Won't arch.xen.tsc_info_1/arch.xen.tsc_info_2
> pointers become dangling here after we free the old CPUID data ...
>
> > +
> > +void kvm_xen_setup_tsc_info(struct kvm_vcpu *vcpu)
> > +{
> > + struct kvm_cpuid_entry2 *entry = vcpu->arch.xen.tsc_info_1;
> > +
> > + if (entry) {
> > + entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
> > + entry->edx = vcpu->arch.hv_clock.tsc_shift;
>
> ... just to crash everything here?
Yes, you are indeed correct. I'd never considered the leaves being removed from the set but the code should cope with this. V2 shortly.
Paul
>
> > + }
> > +
> > + entry = vcpu->arch.xen.tsc_info_2;
> > + if (entry)
> > + entry->eax = vcpu->arch.hw_tsc_khz;
> > +}
> > diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
> > index 532a535a9e99..1afb663318a9 100644
> > --- a/arch/x86/kvm/xen.h
> > +++ b/arch/x86/kvm/xen.h
> > @@ -32,6 +32,8 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe,
> > int kvm_xen_setup_evtchn(struct kvm *kvm,
> > struct kvm_kernel_irq_routing_entry *e,
> > const struct kvm_irq_routing_entry *ue);
> > +void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu);
> > +void kvm_xen_setup_tsc_info(struct kvm_vcpu *vcpu);
> >
> > static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
> > {
> > @@ -135,6 +137,14 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu)
> > {
> > return false;
> > }
> > +
> > +static inline void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu)
> > +{
> > +}
> > +
> > +static inline void kvm_xen_setup_tsc_info(struct kvm_vcpu *vcpu)
> > +{
> > +}
> > #endif
> >
> > int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
>
> --
> Vitaly
On Wed, Jun 22, 2022, Paul Durrant wrote:
> The scaling information in sub-leaf 1 should match the values in the
> 'vcpu_info' sub-structure 'time_info' (a.k.a. pvclock_vcpu_time_info) which
> is shared with the guest. The offset values are not set since a TSC offset
> is already applied.
> The host TSC frequency should also be set in sub-leaf 2.
Explain why this is KVM's problem, i.e. why userspace is unable to set the correct
values.
> This patch adds a new kvm_xen_set_cpuid() function that scans for the
Please avoid "This patch".
> relevant CPUID leaf when the CPUID information is updated by the VMM and
> stashes pointers to the sub-leaves in the kvm_vcpu_xen structure.
> The values are then updated by a call to the, also new,
> kvm_xen_setup_tsc_info() function made at the end of
> kvm_guest_time_update() just before entering the guest.
This is not a helpful paragraph, it provides zero information that isn't obvious
from the code.
The changelog should read something like:
Update Xen CPUID leaves that expose TSC frequency and scaling information
to the guest <blah blah blah>. Cache the leaves <blah blah blah>.
> Signed-off-by: Paul Durrant <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 2 ++
> arch/x86/kvm/cpuid.c | 2 ++
> arch/x86/kvm/x86.c | 1 +
> arch/x86/kvm/xen.c | 41 +++++++++++++++++++++++++++++++++
> arch/x86/kvm/xen.h | 10 ++++++++
> 5 files changed, 56 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 1038ccb7056a..f77a4940542f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -638,6 +638,8 @@ struct kvm_vcpu_xen {
> struct hrtimer timer;
> int poll_evtchn;
> struct timer_list poll_timer;
> + struct kvm_cpuid_entry2 *tsc_info_1;
> + struct kvm_cpuid_entry2 *tsc_info_2;
> };
>
> struct kvm_vcpu_arch {
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index d47222ab8e6e..eb6cd88c974a 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -25,6 +25,7 @@
> #include "mmu.h"
> #include "trace.h"
> #include "pmu.h"
> +#include "xen.h"
>
> /*
> * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
> @@ -310,6 +311,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> __cr4_reserved_bits(guest_cpuid_has, vcpu);
>
> kvm_hv_set_cpuid(vcpu);
> + kvm_xen_set_cpuid(vcpu);
>
> /* Invoke the vendor callback only after the above state is updated. */
> static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 00e23dc518e0..8b45f9975e45 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3123,6 +3123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> if (vcpu->xen.vcpu_time_info_cache.active)
> kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
> kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
> + kvm_xen_setup_tsc_info(v);
This can be called inside this if statement, no?
if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
}
> return 0;
> }
>
> diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> index 610beba35907..a016ff85264d 100644
> --- a/arch/x86/kvm/xen.c
> +++ b/arch/x86/kvm/xen.c
> @@ -10,6 +10,9 @@
> #include "xen.h"
> #include "hyperv.h"
> #include "lapic.h"
> +#include "cpuid.h"
> +
> +#include <asm/xen/cpuid.h>
>
> #include <linux/eventfd.h>
> #include <linux/kvm_host.h>
> @@ -1855,3 +1858,41 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
> if (kvm->arch.xen_hvm_config.msr)
> static_branch_slow_dec_deferred(&kvm_xen_enabled);
> }
> +
> +void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu)
This is a very, very misleading name. It does not "set" anything. Given that
this patch adds "set" and "setup", I expected the "set" to you know, set the CPUID
leaves and the "setup" to prepar for that, not the other way around.
If the leaves really do need to be cached, kvm_xen_after_set_cpuid() is probably
the least awful name.
> +{
> + u32 base = 0;
> + u32 function;
> +
> + for_each_possible_hypervisor_cpuid_base(function) {
> + struct kvm_cpuid_entry2 *entry = kvm_find_cpuid_entry(vcpu, function, 0);
> +
> + if (entry &&
> + entry->ebx == XEN_CPUID_SIGNATURE_EBX &&
> + entry->ecx == XEN_CPUID_SIGNATURE_ECX &&
> + entry->edx == XEN_CPUID_SIGNATURE_EDX) {
> + base = function;
> + break;
> + }
> + }
> + if (!base)
> + return;
> +
> + function = base | XEN_CPUID_LEAF(3);
> + vcpu->arch.xen.tsc_info_1 = kvm_find_cpuid_entry(vcpu, function, 1);
> + vcpu->arch.xen.tsc_info_2 = kvm_find_cpuid_entry(vcpu, function, 2);
Is it really necessary to cache the leave? Guest CPUID isn't optimized, but it's
not _that_ slow, and unless I'm missing something updating the TSC frequency and
scaling info should be uncommon, i.e. not performance critical.
> -----Original Message-----
> From: Sean Christopherson <[email protected]>
> Sent: 22 June 2022 15:44
> To: Durrant, Paul <[email protected]>
> Cc: [email protected]; [email protected]; [email protected]; Paolo Bonzini
> <[email protected]>; Vitaly Kuznetsov <[email protected]>; Wanpeng Li <[email protected]>; Jim
> Mattson <[email protected]>; Joerg Roedel <[email protected]>; Thomas Gleixner <[email protected]>;
> Ingo Molnar <[email protected]>; Borislav Petkov <[email protected]>; Dave Hansen
> <[email protected]>; H. Peter Anvin <[email protected]>
> Subject: RE: [EXTERNAL][PATCH] KVM: x86/xen: Update Xen CPUID Leaf 4 (tsc info) sub-leaves, if present
>
> CAUTION: This email originated from outside of the organization. Do not click links or open
> attachments unless you can confirm the sender and know the content is safe.
>
>
>
> On Wed, Jun 22, 2022, Paul Durrant wrote:
> > The scaling information in sub-leaf 1 should match the values in the
> > 'vcpu_info' sub-structure 'time_info' (a.k.a. pvclock_vcpu_time_info) which
> > is shared with the guest. The offset values are not set since a TSC offset
> > is already applied.
> > The host TSC frequency should also be set in sub-leaf 2.
>
> Explain why this is KVM's problem, i.e. why userspace is unable to set the correct
> values.
Ok, I'll explain that there is no interface for the VMM to acquire the time_info.
>
> > This patch adds a new kvm_xen_set_cpuid() function that scans for the
>
> Please avoid "This patch".
>
> > relevant CPUID leaf when the CPUID information is updated by the VMM and
> > stashes pointers to the sub-leaves in the kvm_vcpu_xen structure.
> > The values are then updated by a call to the, also new,
> > kvm_xen_setup_tsc_info() function made at the end of
> > kvm_guest_time_update() just before entering the guest.
>
> This is not a helpful paragraph, it provides zero information that isn't obvious
> from the code.
>
> The changelog should read something like:
>
> Update Xen CPUID leaves that expose TSC frequency and scaling information
> to the guest <blah blah blah>. Cache the leaves <blah blah blah>.
>
Ok, sure.
Paul
> > Signed-off-by: Paul Durrant <[email protected]>
> > ---
> > arch/x86/include/asm/kvm_host.h | 2 ++
> > arch/x86/kvm/cpuid.c | 2 ++
> > arch/x86/kvm/x86.c | 1 +
> > arch/x86/kvm/xen.c | 41 +++++++++++++++++++++++++++++++++
> > arch/x86/kvm/xen.h | 10 ++++++++
> > 5 files changed, 56 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 1038ccb7056a..f77a4940542f 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -638,6 +638,8 @@ struct kvm_vcpu_xen {
> > struct hrtimer timer;
> > int poll_evtchn;
> > struct timer_list poll_timer;
> > + struct kvm_cpuid_entry2 *tsc_info_1;
> > + struct kvm_cpuid_entry2 *tsc_info_2;
> > };
> >
> > struct kvm_vcpu_arch {
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index d47222ab8e6e..eb6cd88c974a 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -25,6 +25,7 @@
> > #include "mmu.h"
> > #include "trace.h"
> > #include "pmu.h"
> > +#include "xen.h"
> >
> > /*
> > * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
> > @@ -310,6 +311,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> > __cr4_reserved_bits(guest_cpuid_has, vcpu);
> >
> > kvm_hv_set_cpuid(vcpu);
> > + kvm_xen_set_cpuid(vcpu);
> >
> > /* Invoke the vendor callback only after the above state is updated. */
> > static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 00e23dc518e0..8b45f9975e45 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -3123,6 +3123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> > if (vcpu->xen.vcpu_time_info_cache.active)
> > kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
> > kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
> > + kvm_xen_setup_tsc_info(v);
>
> This can be called inside this if statement, no?
>
> if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
>
> }
>
> > return 0;
> > }
> >
> > diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> > index 610beba35907..a016ff85264d 100644
> > --- a/arch/x86/kvm/xen.c
> > +++ b/arch/x86/kvm/xen.c
> > @@ -10,6 +10,9 @@
> > #include "xen.h"
> > #include "hyperv.h"
> > #include "lapic.h"
> > +#include "cpuid.h"
> > +
> > +#include <asm/xen/cpuid.h>
> >
> > #include <linux/eventfd.h>
> > #include <linux/kvm_host.h>
> > @@ -1855,3 +1858,41 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
> > if (kvm->arch.xen_hvm_config.msr)
> > static_branch_slow_dec_deferred(&kvm_xen_enabled);
> > }
> > +
> > +void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu)
>
> This is a very, very misleading name. It does not "set" anything. Given that
> this patch adds "set" and "setup", I expected the "set" to you know, set the CPUID
> leaves and the "setup" to prepar for that, not the other way around.
>
> If the leaves really do need to be cached, kvm_xen_after_set_cpuid() is probably
> the least awful name.
>
> > +{
> > + u32 base = 0;
> > + u32 function;
> > +
> > + for_each_possible_hypervisor_cpuid_base(function) {
> > + struct kvm_cpuid_entry2 *entry = kvm_find_cpuid_entry(vcpu, function, 0);
> > +
> > + if (entry &&
> > + entry->ebx == XEN_CPUID_SIGNATURE_EBX &&
> > + entry->ecx == XEN_CPUID_SIGNATURE_ECX &&
> > + entry->edx == XEN_CPUID_SIGNATURE_EDX) {
> > + base = function;
> > + break;
> > + }
> > + }
> > + if (!base)
> > + return;
> > +
> > + function = base | XEN_CPUID_LEAF(3);
> > + vcpu->arch.xen.tsc_info_1 = kvm_find_cpuid_entry(vcpu, function, 1);
> > + vcpu->arch.xen.tsc_info_2 = kvm_find_cpuid_entry(vcpu, function, 2);
>
> Is it really necessary to cache the leave? Guest CPUID isn't optimized, but it's
> not _that_ slow, and unless I'm missing something updating the TSC frequency and
> scaling info should be uncommon, i.e. not performance critical.
On Mon, Jun 27, 2022, Durrant, Paul wrote:
> > -----Original Message-----
> [snip]
> > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > index 00e23dc518e0..8b45f9975e45 100644
> > > > --- a/arch/x86/kvm/x86.c
> > > > +++ b/arch/x86/kvm/x86.c
> > > > @@ -3123,6 +3123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> > > > if (vcpu->xen.vcpu_time_info_cache.active)
> > > > kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
> > > > kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
> > > > + kvm_xen_setup_tsc_info(v);
> > >
> > > This can be called inside this if statement, no?
> > >
> > > if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
> > >
> > > }
> > >
>
> I think it ought to be done whenever the shared copy of Xen's vcpu_info is
> updated (it will always match on real Xen) so unconditionally calling it here
> seems reasonable.
But isn't the call pointless if the vCPU's hw_tsc_khz is unchanged? E.g if the
params were explicitly passed in, then it would look like:
if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
&vcpu->hv_clock.tsc_shift,
&vcpu->hv_clock.tsc_to_system_mul);
vcpu->hw_tsc_khz = tgt_tsc_khz;
kvm_xen_setup_tsc_info(vcpu, tgt_tsc_khz,
vcpu->hv_clock.tsc_shift,
vcpu->hv_clock.tsc_to_system_mul);
}
Explicitly passing in the arguments probably isn't necessary, just use a more
precise name, e.g. kvm_xen_update_tsc_khz(), to make it clear that the update is
limited to TSC frequency changes.
> > > > +{
> > > > + u32 base = 0;
> > > > + u32 function;
> > > > +
> > > > + for_each_possible_hypervisor_cpuid_base(function) {
> > > > + struct kvm_cpuid_entry2 *entry = kvm_find_cpuid_entry(vcpu, function, 0);
> > > > +
> > > > + if (entry &&
> > > > + entry->ebx == XEN_CPUID_SIGNATURE_EBX &&
> > > > + entry->ecx == XEN_CPUID_SIGNATURE_ECX &&
> > > > + entry->edx == XEN_CPUID_SIGNATURE_EDX) {
> > > > + base = function;
> > > > + break;
> > > > + }
> > > > + }
> > > > + if (!base)
> > > > + return;
> > > > +
> > > > + function = base | XEN_CPUID_LEAF(3);
> > > > + vcpu->arch.xen.tsc_info_1 = kvm_find_cpuid_entry(vcpu, function, 1);
> > > > + vcpu->arch.xen.tsc_info_2 = kvm_find_cpuid_entry(vcpu, function, 2);
> > >
> > > Is it really necessary to cache the leave? Guest CPUID isn't optimized, but it's
> > > not _that_ slow, and unless I'm missing something updating the TSC frequency and
> > > scaling info should be uncommon, i.e. not performance critical.
>
> If we're updating the values in the leaves on every entry into the guest (as
> with calls to kvm_setup_guest_pvclock()) then I think the cached pointers are
> worthwhile.
But why would you update on every entry to the guest? Isn't this a rare operation
if the update is limited to changes in the host CPU's TSC frequency? Or am I
missing something?
> -----Original Message-----
[snip]
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index 00e23dc518e0..8b45f9975e45 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -3123,6 +3123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> > > if (vcpu->xen.vcpu_time_info_cache.active)
> > > kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
> > > kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
> > > + kvm_xen_setup_tsc_info(v);
> >
> > This can be called inside this if statement, no?
> >
> > if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
> >
> > }
> >
I think it ought to be done whenever the shared copy of Xen's vcpu_info is updated (it will always match on real Xen) so unconditionally calling it here seems reasonable.
> > > return 0;
> > > }
> > >
> > > diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> > > index 610beba35907..a016ff85264d 100644
> > > --- a/arch/x86/kvm/xen.c
> > > +++ b/arch/x86/kvm/xen.c
> > > @@ -10,6 +10,9 @@
> > > #include "xen.h"
> > > #include "hyperv.h"
> > > #include "lapic.h"
> > > +#include "cpuid.h"
> > > +
> > > +#include <asm/xen/cpuid.h>
> > >
> > > #include <linux/eventfd.h>
> > > #include <linux/kvm_host.h>
> > > @@ -1855,3 +1858,41 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
> > > if (kvm->arch.xen_hvm_config.msr)
> > > static_branch_slow_dec_deferred(&kvm_xen_enabled);
> > > }
> > > +
> > > +void kvm_xen_set_cpuid(struct kvm_vcpu *vcpu)
> >
> > This is a very, very misleading name. It does not "set" anything. Given that
> > this patch adds "set" and "setup", I expected the "set" to you know, set the CPUID
> > leaves and the "setup" to prepar for that, not the other way around.
> >
> > If the leaves really do need to be cached, kvm_xen_after_set_cpuid() is probably
> > the least awful name.
> >
Ok I'll rename it kvm_xen_after_set_cpuid().
> > > +{
> > > + u32 base = 0;
> > > + u32 function;
> > > +
> > > + for_each_possible_hypervisor_cpuid_base(function) {
> > > + struct kvm_cpuid_entry2 *entry = kvm_find_cpuid_entry(vcpu, function, 0);
> > > +
> > > + if (entry &&
> > > + entry->ebx == XEN_CPUID_SIGNATURE_EBX &&
> > > + entry->ecx == XEN_CPUID_SIGNATURE_ECX &&
> > > + entry->edx == XEN_CPUID_SIGNATURE_EDX) {
> > > + base = function;
> > > + break;
> > > + }
> > > + }
> > > + if (!base)
> > > + return;
> > > +
> > > + function = base | XEN_CPUID_LEAF(3);
> > > + vcpu->arch.xen.tsc_info_1 = kvm_find_cpuid_entry(vcpu, function, 1);
> > > + vcpu->arch.xen.tsc_info_2 = kvm_find_cpuid_entry(vcpu, function, 2);
> >
> > Is it really necessary to cache the leave? Guest CPUID isn't optimized, but it's
> > not _that_ slow, and unless I'm missing something updating the TSC frequency and
> > scaling info should be uncommon, i.e. not performance critical.
If we're updating the values in the leaves on every entry into the guest (as with calls to kvm_setup_guest_pvclock()) then I think the cached pointers are worthwhile.
Paul
> -----Original Message-----
> From: Sean Christopherson <[email protected]>
> Sent: 27 June 2022 16:52
> To: Durrant, Paul <[email protected]>
> Cc: [email protected]; [email protected]; [email protected]; Paolo Bonzini
> <[email protected]>; Vitaly Kuznetsov <[email protected]>; Wanpeng Li <[email protected]>; Jim
> Mattson <[email protected]>; Joerg Roedel <[email protected]>; Thomas Gleixner <[email protected]>;
> Ingo Molnar <[email protected]>; Borislav Petkov <[email protected]>; Dave Hansen
> <[email protected]>; H. Peter Anvin <[email protected]>
> Subject: RE: [EXTERNAL][PATCH] KVM: x86/xen: Update Xen CPUID Leaf 4 (tsc info) sub-leaves, if present
>
> CAUTION: This email originated from outside of the organization. Do not click links or open
> attachments unless you can confirm the sender and know the content is safe.
>
>
>
> On Mon, Jun 27, 2022, Durrant, Paul wrote:
> > > -----Original Message-----
> > [snip]
> > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > > index 00e23dc518e0..8b45f9975e45 100644
> > > > > --- a/arch/x86/kvm/x86.c
> > > > > +++ b/arch/x86/kvm/x86.c
> > > > > @@ -3123,6 +3123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> > > > > if (vcpu->xen.vcpu_time_info_cache.active)
> > > > > kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
> > > > > kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
> > > > > + kvm_xen_setup_tsc_info(v);
> > > >
> > > > This can be called inside this if statement, no?
> > > >
> > > > if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
> > > >
> > > > }
> > > >
> >
> > I think it ought to be done whenever the shared copy of Xen's vcpu_info is
> > updated (it will always match on real Xen) so unconditionally calling it here
> > seems reasonable.
>
> But isn't the call pointless if the vCPU's hw_tsc_khz is unchanged? E.g if the
> params were explicitly passed in, then it would look like:
>
> if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
> kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
> &vcpu->hv_clock.tsc_shift,
> &vcpu->hv_clock.tsc_to_system_mul);
> vcpu->hw_tsc_khz = tgt_tsc_khz;
>
> kvm_xen_setup_tsc_info(vcpu, tgt_tsc_khz,
> vcpu->hv_clock.tsc_shift,
> vcpu->hv_clock.tsc_to_system_mul);
> }
>
> Explicitly passing in the arguments probably isn't necessary, just use a more
> precise name, e.g. kvm_xen_update_tsc_khz(), to make it clear that the update is
> limited to TSC frequency changes.
>
> > > > > +{
> > > > > + u32 base = 0;
> > > > > + u32 function;
> > > > > +
> > > > > + for_each_possible_hypervisor_cpuid_base(function) {
> > > > > + struct kvm_cpuid_entry2 *entry = kvm_find_cpuid_entry(vcpu, function, 0);
> > > > > +
> > > > > + if (entry &&
> > > > > + entry->ebx == XEN_CPUID_SIGNATURE_EBX &&
> > > > > + entry->ecx == XEN_CPUID_SIGNATURE_ECX &&
> > > > > + entry->edx == XEN_CPUID_SIGNATURE_EDX) {
> > > > > + base = function;
> > > > > + break;
> > > > > + }
> > > > > + }
> > > > > + if (!base)
> > > > > + return;
> > > > > +
> > > > > + function = base | XEN_CPUID_LEAF(3);
> > > > > + vcpu->arch.xen.tsc_info_1 = kvm_find_cpuid_entry(vcpu, function, 1);
> > > > > + vcpu->arch.xen.tsc_info_2 = kvm_find_cpuid_entry(vcpu, function, 2);
> > > >
> > > > Is it really necessary to cache the leave? Guest CPUID isn't optimized, but it's
> > > > not _that_ slow, and unless I'm missing something updating the TSC frequency and
> > > > scaling info should be uncommon, i.e. not performance critical.
> >
> > If we're updating the values in the leaves on every entry into the guest (as
> > with calls to kvm_setup_guest_pvclock()) then I think the cached pointers are
> > worthwhile.
>
> But why would you update on every entry to the guest? Isn't this a rare operation
> if the update is limited to changes in the host CPU's TSC frequency? Or am I
> missing something?
No, I am indeed forgetting that there is no offset to update (there once was) so indeed the values will only change if the freq changes... so I'll drop the caching.
Paul