Snapshot the host's MSR_IA32_ARCH_CAPABILITIES to avoid repeated RDMSRs
at runtime, and cleanup the pseudo-cache vmx_fb_clear_ctrl_available.
Sean Christopherson (2):
KVM: x86: Snapshot host's MSR_IA32_ARCH_CAPABILITIES
KVM: VMX: Drop unnecessary vmx_fb_clear_ctrl_available "cache"
arch/x86/kvm/vmx/vmx.c | 33 ++++++---------------------------
arch/x86/kvm/x86.c | 13 +++++++------
arch/x86/kvm/x86.h | 1 +
3 files changed, 14 insertions(+), 33 deletions(-)
base-commit: 02f1b0b736606f9870595b3089d9c124f9da8be9
--
2.41.0.162.gfafddb0af9-goog
Snapshot the host's MSR_IA32_ARCH_CAPABILITIES, if it's supported, instead
of reading the MSR every time KVM wants to query the host state, e.g. when
initializing the default value during vCPU creation. The paths that query
ARCH_CAPABILITIES aren't particularly performance sensitive, but creating
vCPUs is a frequent enough operation that burning 8 bytes is a good
trade-off.
Alternatively, KVM could add a field in kvm_caps and thus skip the
on-demand calculations entirely, but a pure snapshot isn't possible due to
the way KVM handles the l1tf_vmx_mitigation module param. And unlike the
other "supported" fields in kvm_caps, KVM doesn't enforce the "supported"
value, i.e. KVM treats ARCH_CAPABILITIES like a CPUID leaf and lets
userspace advertise whatever it wants. Those problems are solvable, but
it's not clear there is real benefit versus snapshotting the host value,
and grabbing the host value will allow additional cleanup of KVM's
FB_CLEAR_CTRL code.
Link: https://lore.kernel.org/all/[email protected]
Cc: Chao Gao <[email protected]>
Cc: Xiaoyao Li <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/kvm/vmx/vmx.c | 22 ++++++----------------
arch/x86/kvm/x86.c | 13 +++++++------
arch/x86/kvm/x86.h | 1 +
3 files changed, 14 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2d9d155691a7..42d1148f933c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -255,14 +255,9 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
return 0;
}
- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
- u64 msr;
-
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
- if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
- l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
- return 0;
- }
+ if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+ l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+ return 0;
}
/* If set to auto use the default l1tf mitigation method */
@@ -373,15 +368,10 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
static void vmx_setup_fb_clear_ctrl(void)
{
- u64 msr;
-
- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
+ if ((host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
!boot_cpu_has_bug(X86_BUG_MDS) &&
- !boot_cpu_has_bug(X86_BUG_TAA)) {
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
- if (msr & ARCH_CAP_FB_CLEAR_CTRL)
- vmx_fb_clear_ctrl_available = true;
- }
+ !boot_cpu_has_bug(X86_BUG_TAA))
+ vmx_fb_clear_ctrl_available = true;
}
static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7c7be4815eaa..7c2e796fa460 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -237,6 +237,9 @@ EXPORT_SYMBOL_GPL(enable_apicv);
u64 __read_mostly host_xss;
EXPORT_SYMBOL_GPL(host_xss);
+u64 __read_mostly host_arch_capabilities;
+EXPORT_SYMBOL_GPL(host_arch_capabilities);
+
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
@@ -1612,12 +1615,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
static u64 kvm_get_arch_capabilities(void)
{
- u64 data = 0;
-
- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
- data &= KVM_SUPPORTED_ARCH_CAP;
- }
+ u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
/*
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
@@ -9492,6 +9490,9 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_init_pmu_capability(ops->pmu_ops);
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities);
+
r = ops->hardware_setup();
if (r != 0)
goto out_mmu_exit;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 82e3dafc5453..1e7be1f6ab29 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -323,6 +323,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
extern u64 host_xcr0;
extern u64 host_xss;
+extern u64 host_arch_capabilities;
extern struct kvm_caps kvm_caps;
--
2.41.0.162.gfafddb0af9-goog
Now that KVM snapshots the host's MSR_IA32_ARCH_CAPABILITIES, drop the
similar snapshot/cache of whether or not KVM is allowed to manipulate
ARCH_CAPABILITIES.FB_CLEAR_CTRL. The motivation for the cache was
presumably to avoid the RDMSR, e.g. boot_cpu_has_bug() is quite cheap, and
modifying the vCPU's MSR_IA32_ARCH_CAPABILITIES is an infrequent option
and a relatively slow path.
Cc: Pawan Gupta <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/kvm/vmx/vmx.c | 17 +++--------------
1 file changed, 3 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 42d1148f933c..17003660138a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -237,9 +237,6 @@ static const struct {
#define L1D_CACHE_ORDER 4
static void *vmx_l1d_flush_pages;
-/* Control for disabling CPU Fill buffer clear */
-static bool __read_mostly vmx_fb_clear_ctrl_available;
-
static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
{
struct page *page;
@@ -366,14 +363,6 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
}
-static void vmx_setup_fb_clear_ctrl(void)
-{
- if ((host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
- !boot_cpu_has_bug(X86_BUG_MDS) &&
- !boot_cpu_has_bug(X86_BUG_TAA))
- vmx_fb_clear_ctrl_available = true;
-}
-
static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
{
u64 msr;
@@ -399,7 +388,9 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
- vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+ vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+ !boot_cpu_has_bug(X86_BUG_MDS) &&
+ !boot_cpu_has_bug(X86_BUG_TAA);
/*
* If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
@@ -8580,8 +8571,6 @@ static int __init vmx_init(void)
if (r)
goto err_l1d_flush;
- vmx_setup_fb_clear_ctrl();
-
for_each_possible_cpu(cpu) {
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
--
2.41.0.162.gfafddb0af9-goog
On Tue, Jun 06, 2023 at 05:43:10PM -0700, Sean Christopherson wrote:
> Now that KVM snapshots the host's MSR_IA32_ARCH_CAPABILITIES, drop the
> similar snapshot/cache of whether or not KVM is allowed to manipulate
> ARCH_CAPABILITIES.FB_CLEAR_CTRL. The motivation for the cache was
FB_CLEAR_CTRL is a read-only bit, I think you mean
MSR_IA32_MCU_OPT_CTRL.FB_CLEAR_DIS.
> presumably to avoid the RDMSR, e.g. boot_cpu_has_bug() is quite cheap, and
> modifying the vCPU's MSR_IA32_ARCH_CAPABILITIES is an infrequent option
> and a relatively slow path.
>
> Cc: Pawan Gupta <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
LGTM.
Reviewed-by: Pawan Gupta <[email protected]>
On 6/7/2023 8:43 AM, Sean Christopherson wrote:
> Snapshot the host's MSR_IA32_ARCH_CAPABILITIES, if it's supported, instead
> of reading the MSR every time KVM wants to query the host state, e.g. when
> initializing the default value during vCPU creation. The paths that query
> ARCH_CAPABILITIES aren't particularly performance sensitive, but creating
> vCPUs is a frequent enough operation that burning 8 bytes is a good
> trade-off.
>
> Alternatively, KVM could add a field in kvm_caps and thus skip the
> on-demand calculations entirely, but a pure snapshot isn't possible due to
> the way KVM handles the l1tf_vmx_mitigation module param. And unlike the
> other "supported" fields in kvm_caps, KVM doesn't enforce the "supported"
> value, i.e. KVM treats ARCH_CAPABILITIES like a CPUID leaf and lets
> userspace advertise whatever it wants. Those problems are solvable, but
> it's not clear there is real benefit versus snapshotting the host value,
> and grabbing the host value will allow additional cleanup of KVM's
> FB_CLEAR_CTRL code.
Reviewed-by: Xiaoyao Li <[email protected]>
> Link: https://lore.kernel.org/all/[email protected]
> Cc: Chao Gao <[email protected]>
> Cc: Xiaoyao Li <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
> arch/x86/kvm/vmx/vmx.c | 22 ++++++----------------
> arch/x86/kvm/x86.c | 13 +++++++------
> arch/x86/kvm/x86.h | 1 +
> 3 files changed, 14 insertions(+), 22 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 2d9d155691a7..42d1148f933c 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -255,14 +255,9 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
> return 0;
> }
>
> - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
> - u64 msr;
> -
> - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
> - if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
> - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
> - return 0;
> - }
> + if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
> + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
> + return 0;
> }
>
> /* If set to auto use the default l1tf mitigation method */
> @@ -373,15 +368,10 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
>
> static void vmx_setup_fb_clear_ctrl(void)
> {
> - u64 msr;
> -
> - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
> + if ((host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
> !boot_cpu_has_bug(X86_BUG_MDS) &&
> - !boot_cpu_has_bug(X86_BUG_TAA)) {
> - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
> - if (msr & ARCH_CAP_FB_CLEAR_CTRL)
> - vmx_fb_clear_ctrl_available = true;
> - }
> + !boot_cpu_has_bug(X86_BUG_TAA))
> + vmx_fb_clear_ctrl_available = true;
> }
>
> static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7c7be4815eaa..7c2e796fa460 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -237,6 +237,9 @@ EXPORT_SYMBOL_GPL(enable_apicv);
> u64 __read_mostly host_xss;
> EXPORT_SYMBOL_GPL(host_xss);
>
> +u64 __read_mostly host_arch_capabilities;
> +EXPORT_SYMBOL_GPL(host_arch_capabilities);
> +
> const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
> KVM_GENERIC_VM_STATS(),
> STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
> @@ -1612,12 +1615,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
>
> static u64 kvm_get_arch_capabilities(void)
> {
> - u64 data = 0;
> -
> - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
> - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
> - data &= KVM_SUPPORTED_ARCH_CAP;
> - }
> + u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
>
> /*
> * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
> @@ -9492,6 +9490,9 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
>
> kvm_init_pmu_capability(ops->pmu_ops);
>
> + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
> + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities);
> +
> r = ops->hardware_setup();
> if (r != 0)
> goto out_mmu_exit;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 82e3dafc5453..1e7be1f6ab29 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -323,6 +323,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
>
> extern u64 host_xcr0;
> extern u64 host_xss;
> +extern u64 host_arch_capabilities;
>
> extern struct kvm_caps kvm_caps;
>
On 6/7/2023 8:43 AM, Sean Christopherson wrote:
> Now that KVM snapshots the host's MSR_IA32_ARCH_CAPABILITIES, drop the
> similar snapshot/cache of whether or not KVM is allowed to manipulate
> ARCH_CAPABILITIES.FB_CLEAR_CTRL. The motivation for the cache was
> presumably to avoid the RDMSR, e.g. boot_cpu_has_bug() is quite cheap, and
> modifying the vCPU's MSR_IA32_ARCH_CAPABILITIES is an infrequent option
> and a relatively slow path.
Reviewed-by: Xiaoyao Li <[email protected]>
> Cc: Pawan Gupta <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
> arch/x86/kvm/vmx/vmx.c | 17 +++--------------
> 1 file changed, 3 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 42d1148f933c..17003660138a 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -237,9 +237,6 @@ static const struct {
> #define L1D_CACHE_ORDER 4
> static void *vmx_l1d_flush_pages;
>
> -/* Control for disabling CPU Fill buffer clear */
> -static bool __read_mostly vmx_fb_clear_ctrl_available;
> -
> static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
> {
> struct page *page;
> @@ -366,14 +363,6 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
> return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
> }
>
> -static void vmx_setup_fb_clear_ctrl(void)
> -{
> - if ((host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
> - !boot_cpu_has_bug(X86_BUG_MDS) &&
> - !boot_cpu_has_bug(X86_BUG_TAA))
> - vmx_fb_clear_ctrl_available = true;
> -}
> -
> static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
> {
> u64 msr;
> @@ -399,7 +388,9 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
>
> static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
> {
> - vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
> + vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
> + !boot_cpu_has_bug(X86_BUG_MDS) &&
> + !boot_cpu_has_bug(X86_BUG_TAA);
>
> /*
> * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
> @@ -8580,8 +8571,6 @@ static int __init vmx_init(void)
> if (r)
> goto err_l1d_flush;
>
> - vmx_setup_fb_clear_ctrl();
> -
> for_each_possible_cpu(cpu) {
> INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
>
On Tue, Jun 06, 2023 at 05:43:09PM -0700, Sean Christopherson wrote:
>Snapshot the host's MSR_IA32_ARCH_CAPABILITIES, if it's supported, instead
>of reading the MSR every time KVM wants to query the host state, e.g. when
>initializing the default value during vCPU creation. The paths that query
>ARCH_CAPABILITIES aren't particularly performance sensitive, but creating
>vCPUs is a frequent enough operation that burning 8 bytes is a good
>trade-off.
>
>Alternatively, KVM could add a field in kvm_caps and thus skip the
>on-demand calculations entirely, but a pure snapshot isn't possible due to
>the way KVM handles the l1tf_vmx_mitigation module param. And unlike the
>other "supported" fields in kvm_caps, KVM doesn't enforce the "supported"
>value, i.e. KVM treats ARCH_CAPABILITIES like a CPUID leaf and lets
>userspace advertise whatever it wants. Those problems are solvable, but
>it's not clear there is real benefit versus snapshotting the host value,
>and grabbing the host value will allow additional cleanup of KVM's
>FB_CLEAR_CTRL code.
>
>Link: https://lore.kernel.org/all/[email protected]
>Cc: Chao Gao <[email protected]>
>Cc: Xiaoyao Li <[email protected]>
>Signed-off-by: Sean Christopherson <[email protected]>
Reviewed-by: Chao Gao <[email protected]>
On Tue, 06 Jun 2023 17:43:08 -0700, Sean Christopherson wrote:
> Snapshot the host's MSR_IA32_ARCH_CAPABILITIES to avoid repeated RDMSRs
> at runtime, and cleanup the pseudo-cache vmx_fb_clear_ctrl_available.
>
> Sean Christopherson (2):
> KVM: x86: Snapshot host's MSR_IA32_ARCH_CAPABILITIES
> KVM: VMX: Drop unnecessary vmx_fb_clear_ctrl_available "cache"
>
> [...]
Applied to kvm-x86 misc, with a fixup for the ARCH_CAPABILITIES.FB_CLEAR_CTRL vs.
MSR_IA32_MCU_OPT_CTRL.FB_CLEAR_DIS confusion in patch 2's changelog.
Thanks!
[1/2] KVM: x86: Snapshot host's MSR_IA32_ARCH_CAPABILITIES
https://github.com/kvm-x86/linux/commit/a2fd5d02bad6
[2/2] KVM: VMX: Drop unnecessary vmx_fb_clear_ctrl_available "cache"
https://github.com/kvm-x86/linux/commit/550ba57faa04
--
https://github.com/kvm-x86/linux/tree/next
https://github.com/kvm-x86/linux/tree/fixes