From: Isaku Yamahata <[email protected]>
KVM/X86 uses user return notifier to switch MSR for guest or user space.
Snapshot host values on CPU online, change MSR values for guest, and
restore them on returning to user space. The current code abuses
kvm_arch_hardware_enable() which is called on kvm module initialization or
CPU online.
Remove such the abuse of kvm_arch_hardware_enable by capturing the host
value on the first change of the MSR value to guest VM instead of CPU
online.
Suggested-by: Sean Christopherson <[email protected]>
Signed-off-by: Isaku Yamahata <[email protected]>
---
arch/x86/kvm/x86.c | 43 ++++++++++++++++++++++++-------------------
1 file changed, 24 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 205ebdc2b11b..16104a2f7d8e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -200,6 +200,7 @@ struct kvm_user_return_msrs {
struct kvm_user_return_msr_values {
u64 host;
u64 curr;
+ bool initialized;
} values[KVM_MAX_NR_USER_RETURN_MSRS];
};
@@ -363,6 +364,10 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
local_irq_restore(flags);
for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
values = &msrs->values[slot];
+ /*
+ * No need to check values->initialized because host = curr = 0
+ * by __GFP_ZERO when !values->initialized.
+ */
if (values->host != values->curr) {
wrmsrl(kvm_uret_msrs_list[slot], values->host);
values->curr = values->host;
@@ -409,34 +414,30 @@ int kvm_find_user_return_msr(u32 msr)
}
EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
-static void kvm_user_return_msr_cpu_online(void)
-{
- unsigned int cpu = smp_processor_id();
- struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
- u64 value;
- int i;
-
- for (i = 0; i < kvm_nr_uret_msrs; ++i) {
- rdmsrl_safe(kvm_uret_msrs_list[i], &value);
- msrs->values[i].host = value;
- msrs->values[i].curr = value;
- }
-}
-
int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
{
unsigned int cpu = smp_processor_id();
struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
+ struct kvm_user_return_msr_values *values = &msrs->values[slot];
int err;
- value = (value & mask) | (msrs->values[slot].host & ~mask);
- if (value == msrs->values[slot].curr)
+ if (unlikely(!values->initialized)) {
+ u64 host_value;
+
+ rdmsrl_safe(kvm_uret_msrs_list[slot], &host_value);
+ values->host = host_value;
+ values->curr = host_value;
+ values->initialized = true;
+ }
+
+ value = (value & mask) | (values->host & ~mask);
+ if (value == values->curr)
return 0;
err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
if (err)
return 1;
- msrs->values[slot].curr = value;
+ values->curr = value;
if (!msrs->registered) {
msrs->urn.on_user_return = kvm_on_user_return;
user_return_notifier_register(&msrs->urn);
@@ -9212,7 +9213,12 @@ int kvm_arch_init(void *opaque)
return -ENOMEM;
}
- user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
+ /*
+ * __GFP_ZERO to ensure user_return_msrs.values[].{host, curr} match.
+ * See kvm_on_user_return()
+ */
+ user_return_msrs = alloc_percpu_gfp(struct kvm_user_return_msrs,
+ GFP_KERNEL | __GFP_ZERO);
if (!user_return_msrs) {
printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
r = -ENOMEM;
@@ -11836,7 +11842,6 @@ int kvm_arch_hardware_enable(void)
u64 max_tsc = 0;
bool stable, backwards_tsc = false;
- kvm_user_return_msr_cpu_online();
ret = static_call(kvm_x86_hardware_enable)();
if (ret != 0)
return ret;
--
2.25.1
On Tue, Aug 30, 2022 at 05:01:16AM -0700, [email protected] wrote:
>From: Isaku Yamahata <[email protected]>
>
>KVM/X86 uses user return notifier to switch MSR for guest or user space.
>Snapshot host values on CPU online, change MSR values for guest, and
>restore them on returning to user space. The current code abuses
>kvm_arch_hardware_enable() which is called on kvm module initialization or
>CPU online.
>
>Remove such the abuse of kvm_arch_hardware_enable by capturing the host
>value on the first change of the MSR value to guest VM instead of CPU
>online.
>
>Suggested-by: Sean Christopherson <[email protected]>
>Signed-off-by: Isaku Yamahata <[email protected]>
>---
> arch/x86/kvm/x86.c | 43 ++++++++++++++++++++++++-------------------
> 1 file changed, 24 insertions(+), 19 deletions(-)
>
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index 205ebdc2b11b..16104a2f7d8e 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -200,6 +200,7 @@ struct kvm_user_return_msrs {
> struct kvm_user_return_msr_values {
> u64 host;
> u64 curr;
>+ bool initialized;
> } values[KVM_MAX_NR_USER_RETURN_MSRS];
The benefit of having an "initialized" state for each user return MSR on
each CPU is small. A per-cpu state looks suffice. With it, you can keep
kvm_user_return_msr_cpu_online() and simply call the function from
kvm_set_user_return_msr() if initialized is false on current CPU.
On Thu, Sep 01, 2022, Chao Gao wrote:
> On Tue, Aug 30, 2022 at 05:01:16AM -0700, [email protected] wrote:
> >From: Isaku Yamahata <[email protected]>
> >
> >KVM/X86 uses user return notifier to switch MSR for guest or user space.
> >Snapshot host values on CPU online, change MSR values for guest, and
> >restore them on returning to user space. The current code abuses
> >kvm_arch_hardware_enable() which is called on kvm module initialization or
> >CPU online.
> >
> >Remove such the abuse of kvm_arch_hardware_enable by capturing the host
> >value on the first change of the MSR value to guest VM instead of CPU
> >online.
> >
> >Suggested-by: Sean Christopherson <[email protected]>
> >Signed-off-by: Isaku Yamahata <[email protected]>
> >---
> > arch/x86/kvm/x86.c | 43 ++++++++++++++++++++++++-------------------
> > 1 file changed, 24 insertions(+), 19 deletions(-)
> >
> >diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >index 205ebdc2b11b..16104a2f7d8e 100644
> >--- a/arch/x86/kvm/x86.c
> >+++ b/arch/x86/kvm/x86.c
> >@@ -200,6 +200,7 @@ struct kvm_user_return_msrs {
> > struct kvm_user_return_msr_values {
> > u64 host;
> > u64 curr;
> >+ bool initialized;
> > } values[KVM_MAX_NR_USER_RETURN_MSRS];
>
> The benefit of having an "initialized" state for each user return MSR on
> each CPU is small. A per-cpu state looks suffice. With it, you can keep
> kvm_user_return_msr_cpu_online() and simply call the function from
> kvm_set_user_return_msr() if initialized is false on current CPU.
Yep, a per-CPU flag is I intended. This is the completely untested patch that's
sitting in a development branch of mine.
---
arch/x86/kvm/x86.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eca76f187e4b..1328326acfae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -194,6 +194,7 @@ module_param(eager_page_split, bool, 0644);
struct kvm_user_return_msrs {
struct user_return_notifier urn;
+ bool initialized;
bool registered;
struct kvm_user_return_msr_values {
u64 host;
@@ -400,18 +401,20 @@ int kvm_find_user_return_msr(u32 msr)
return -1;
}
-static void kvm_user_return_msr_cpu_online(void)
+static void kvm_user_return_msr_init_cpu(struct kvm_user_return_msrs *msrs)
{
- unsigned int cpu = smp_processor_id();
- struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
u64 value;
int i;
+ if (msrs->initialized)
+ return;
+
for (i = 0; i < kvm_nr_uret_msrs; ++i) {
rdmsrl_safe(kvm_uret_msrs_list[i], &value);
msrs->values[i].host = value;
msrs->values[i].curr = value;
}
+ msrs->initialized = true;
}
int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
@@ -420,6 +423,8 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
int err;
+ kvm_user_return_msr_init_cpu(msrs);
+
value = (value & mask) | (msrs->values[slot].host & ~mask);
if (value == msrs->values[slot].curr)
return 0;
@@ -11740,7 +11745,6 @@ int kvm_arch_hardware_enable(void)
u64 max_tsc = 0;
bool stable, backwards_tsc = false;
- kvm_user_return_msr_cpu_online();
ret = static_call(kvm_x86_hardware_enable)();
if (ret != 0)
return ret;
base-commit: a8f21d1980fbd7e877ed174142f7f572d547e611
--
On Thu, Sep 01, 2022 at 02:12:56PM +0000,
Sean Christopherson <[email protected]> wrote:
> On Thu, Sep 01, 2022, Chao Gao wrote:
> > On Tue, Aug 30, 2022 at 05:01:16AM -0700, [email protected] wrote:
> > >From: Isaku Yamahata <[email protected]>
> > >
> > >KVM/X86 uses user return notifier to switch MSR for guest or user space.
> > >Snapshot host values on CPU online, change MSR values for guest, and
> > >restore them on returning to user space. The current code abuses
> > >kvm_arch_hardware_enable() which is called on kvm module initialization or
> > >CPU online.
> > >
> > >Remove such the abuse of kvm_arch_hardware_enable by capturing the host
> > >value on the first change of the MSR value to guest VM instead of CPU
> > >online.
> > >
> > >Suggested-by: Sean Christopherson <[email protected]>
> > >Signed-off-by: Isaku Yamahata <[email protected]>
> > >---
> > > arch/x86/kvm/x86.c | 43 ++++++++++++++++++++++++-------------------
> > > 1 file changed, 24 insertions(+), 19 deletions(-)
> > >
> > >diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > >index 205ebdc2b11b..16104a2f7d8e 100644
> > >--- a/arch/x86/kvm/x86.c
> > >+++ b/arch/x86/kvm/x86.c
> > >@@ -200,6 +200,7 @@ struct kvm_user_return_msrs {
> > > struct kvm_user_return_msr_values {
> > > u64 host;
> > > u64 curr;
> > >+ bool initialized;
> > > } values[KVM_MAX_NR_USER_RETURN_MSRS];
> >
> > The benefit of having an "initialized" state for each user return MSR on
> > each CPU is small. A per-cpu state looks suffice. With it, you can keep
> > kvm_user_return_msr_cpu_online() and simply call the function from
> > kvm_set_user_return_msr() if initialized is false on current CPU.
>
> Yep, a per-CPU flag is I intended. This is the completely untested patch that's
> sitting in a development branch of mine.
With the following fix, it worked. I'll replace this patch with yours.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 205ebdc2b11b..0e200fe44b35 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9212,7 +9217,12 @@ int kvm_arch_init(void *opaque)
return -ENOMEM;
}
- user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
+ /*
+ * __GFP_ZERO to ensure user_return_msrs.values[].initialized = false.
+ * See kvm_user_return_msr_init_cpu().
+ */
+ user_return_msrs = alloc_percpu_gfp(struct kvm_user_return_msrs,
+ GFP_KERNEL | __GFP_ZERO);
if (!user_return_msrs) {
printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
r = -ENOMEM;
--
Isaku Yamahata <[email protected]>