From: Zhang Chen <[email protected]>
Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL
as the first feature in the leaf.
RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U
(bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to
disable RRSBA behavior for CPL3 and CPL0/1/2 respectively.
Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses
after a non-zero is written to the MSR. Therefore, guests can already
toggle the two bits if the host supports RRSBA_CTRL, and no extra code
is needed to allow guests to toggle the two bits.
Signed-off-by: Zhang Chen <[email protected]>
Signed-off-by: Chao Gao <[email protected]>
Tested-by: Jiaan Lu <[email protected]>
---
arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++---
arch/x86/kvm/reverse_cpuid.h | 7 +++++++
2 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9583a110cf5f..f024c3ac2203 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void)
SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
);
+ kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
+ SF(RRSBA_CTRL)
+ );
+
kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
@@ -949,13 +953,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break;
/* function 7 has additional index. */
case 7:
- entry->eax = min(entry->eax, 1u);
+ entry->eax = min(entry->eax, 2u);
cpuid_entry_override(entry, CPUID_7_0_EBX);
cpuid_entry_override(entry, CPUID_7_ECX);
cpuid_entry_override(entry, CPUID_7_EDX);
- /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */
- if (entry->eax == 1) {
+ max_idx = entry->eax;
+
+ if (max_idx >= 1) {
entry = do_host_cpuid(array, function, 1);
if (!entry)
goto out;
@@ -965,6 +970,17 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ebx = 0;
entry->ecx = 0;
}
+
+ if (max_idx >= 2) {
+ entry = do_host_cpuid(array, function, 2);
+ if (!entry)
+ goto out;
+
+ cpuid_entry_override(entry, CPUID_7_2_EDX);
+ entry->eax = 0;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ }
break;
case 0xa: { /* Architectural Performance Monitoring */
union cpuid10_eax eax;
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index a5717282bb9c..72bad8314a9c 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -15,6 +15,7 @@ enum kvm_only_cpuid_leafs {
CPUID_12_EAX = NCAPINTS,
CPUID_7_1_EDX,
CPUID_8000_0007_EDX,
+ CPUID_7_2_EDX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -47,6 +48,9 @@ enum kvm_only_cpuid_leafs {
/* CPUID level 0x80000007 (EDX). */
#define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8)
+/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */
+#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
+
struct cpuid_reg {
u32 function;
u32 index;
@@ -69,6 +73,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
[CPUID_7_EDX] = { 7, 0, CPUID_EDX},
[CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
+ [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
[CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
[CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
@@ -108,6 +113,8 @@ static __always_inline u32 __feature_translate(int x86_feature)
return KVM_X86_FEATURE_SGX_EDECCSSA;
else if (x86_feature == X86_FEATURE_CONSTANT_TSC)
return KVM_X86_FEATURE_CONSTANT_TSC;
+ else if (x86_feature == X86_FEATURE_RRSBA_CTRL)
+ return KVM_X86_FEATURE_RRSBA_CTRL;
return x86_feature;
}
--
2.40.0
On 4/14/2023 2:25 PM, Chao Gao wrote:
> From: Zhang Chen <[email protected]>
>
> Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL
> as the first feature in the leaf.
>
> RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U
> (bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to
> disable RRSBA behavior for CPL3 and CPL0/1/2 respectively.
>
> Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses
> after a non-zero is written to the MSR. Therefore, guests can already
> toggle the two bits if the host supports RRSBA_CTRL, and no extra code
> is needed to allow guests to toggle the two bits.
>
> Signed-off-by: Zhang Chen <[email protected]>
> Signed-off-by: Chao Gao <[email protected]>
> Tested-by: Jiaan Lu <[email protected]>
> ---
> arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++---
> arch/x86/kvm/reverse_cpuid.h | 7 +++++++
> 2 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 9583a110cf5f..f024c3ac2203 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void)
> SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
> );
>
> + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
> + SF(RRSBA_CTRL)
> + );
> +
Is it slightly better to put the new added one after
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, ...
to make it in order?
> kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
> F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
> F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
> @@ -949,13 +953,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> break;
> /* function 7 has additional index. */
> case 7:
> - entry->eax = min(entry->eax, 1u);
> + entry->eax = min(entry->eax, 2u);
> cpuid_entry_override(entry, CPUID_7_0_EBX);
> cpuid_entry_override(entry, CPUID_7_ECX);
> cpuid_entry_override(entry, CPUID_7_EDX);
>
> - /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */
> - if (entry->eax == 1) {
> + max_idx = entry->eax;
> +
> + if (max_idx >= 1) {
> entry = do_host_cpuid(array, function, 1);
> if (!entry)
> goto out;
> @@ -965,6 +970,17 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> entry->ebx = 0;
> entry->ecx = 0;
> }
> +
> + if (max_idx >= 2) {
> + entry = do_host_cpuid(array, function, 2);
> + if (!entry)
> + goto out;
> +
> + cpuid_entry_override(entry, CPUID_7_2_EDX);
> + entry->eax = 0;
> + entry->ebx = 0;
> + entry->ecx = 0;
> + }
> break;
> case 0xa: { /* Architectural Performance Monitoring */
> union cpuid10_eax eax;
> diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
> index a5717282bb9c..72bad8314a9c 100644
> --- a/arch/x86/kvm/reverse_cpuid.h
> +++ b/arch/x86/kvm/reverse_cpuid.h
> @@ -15,6 +15,7 @@ enum kvm_only_cpuid_leafs {
> CPUID_12_EAX = NCAPINTS,
> CPUID_7_1_EDX,
> CPUID_8000_0007_EDX,
> + CPUID_7_2_EDX,
> NR_KVM_CPU_CAPS,
>
> NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
> @@ -47,6 +48,9 @@ enum kvm_only_cpuid_leafs {
> /* CPUID level 0x80000007 (EDX). */
> #define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8)
>
> +/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */
> +#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
> +
> struct cpuid_reg {
> u32 function;
> u32 index;
> @@ -69,6 +73,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
> [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
> [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
> [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
> + [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
> [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
> [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
> [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
> @@ -108,6 +113,8 @@ static __always_inline u32 __feature_translate(int x86_feature)
> return KVM_X86_FEATURE_SGX_EDECCSSA;
> else if (x86_feature == X86_FEATURE_CONSTANT_TSC)
> return KVM_X86_FEATURE_CONSTANT_TSC;
> + else if (x86_feature == X86_FEATURE_RRSBA_CTRL)
> + return KVM_X86_FEATURE_RRSBA_CTRL;
>
> return x86_feature;
> }
On Sun, Apr 16, 2023 at 03:04:59PM +0800, Binbin Wu wrote:
>
>On 4/14/2023 2:25 PM, Chao Gao wrote:
>> From: Zhang Chen <[email protected]>
>>
>> Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL
>> as the first feature in the leaf.
>>
>> RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U
>> (bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to
>> disable RRSBA behavior for CPL3 and CPL0/1/2 respectively.
>>
>> Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses
>> after a non-zero is written to the MSR. Therefore, guests can already
>> toggle the two bits if the host supports RRSBA_CTRL, and no extra code
>> is needed to allow guests to toggle the two bits.
>>
>> Signed-off-by: Zhang Chen <[email protected]>
>> Signed-off-by: Chao Gao <[email protected]>
>> Tested-by: Jiaan Lu <[email protected]>
>> ---
>> arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++---
>> arch/x86/kvm/reverse_cpuid.h | 7 +++++++
>> 2 files changed, 26 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> index 9583a110cf5f..f024c3ac2203 100644
>> --- a/arch/x86/kvm/cpuid.c
>> +++ b/arch/x86/kvm/cpuid.c
>> @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void)
>> SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
>> );
>> + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
>> + SF(RRSBA_CTRL)
>> + );
>> +
>
>Is it slightly better to put the new added one after
>
>??? kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, ...
>
>to make it in order?
Yes. Will do.
On 4/14/2023 2:25 PM, Chao Gao wrote:
> From: Zhang Chen <[email protected]>
>
> Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL
> as the first feature in the leaf.
>
> RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U
> (bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to
> disable RRSBA behavior for CPL3 and CPL0/1/2 respectively.
>
> Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses
> after a non-zero is written to the MSR. Therefore, guests can already
> toggle the two bits if the host supports RRSBA_CTRL, and no extra code
> is needed to allow guests to toggle the two bits.
This is a bug that also matters with other bits in MSR_IA32_SPEC_CTRL
which has a dedicated enumeration CPUID bit and no support in KVM yet.
I think we need to fix this bug at first.
> Signed-off-by: Zhang Chen <[email protected]>
> Signed-off-by: Chao Gao <[email protected]>
> Tested-by: Jiaan Lu <[email protected]>
> ---
> arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++---
> arch/x86/kvm/reverse_cpuid.h | 7 +++++++
> 2 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 9583a110cf5f..f024c3ac2203 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void)
> SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
> );
>
> + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
> + SF(RRSBA_CTRL)
> + );
> +
Please move this hook up to right follow the leaf CPUID_7_1_EAX.
> kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
> F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
> F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
> @@ -949,13 +953,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> break;
> /* function 7 has additional index. */
> case 7:
> - entry->eax = min(entry->eax, 1u);
> + entry->eax = min(entry->eax, 2u);
> cpuid_entry_override(entry, CPUID_7_0_EBX);
> cpuid_entry_override(entry, CPUID_7_ECX);
> cpuid_entry_override(entry, CPUID_7_EDX);
>
> - /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */
> - if (entry->eax == 1) {
> + max_idx = entry->eax;
> +
> + if (max_idx >= 1) {
> entry = do_host_cpuid(array, function, 1);
> if (!entry)
> goto out;
> @@ -965,6 +970,17 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> entry->ebx = 0;
> entry->ecx = 0;
> }
> +
> + if (max_idx >= 2) {
> + entry = do_host_cpuid(array, function, 2);
> + if (!entry)
> + goto out;
> +
> + cpuid_entry_override(entry, CPUID_7_2_EDX);
> + entry->eax = 0;
> + entry->ebx = 0;
> + entry->ecx = 0;
> + }
> break;
> case 0xa: { /* Architectural Performance Monitoring */
> union cpuid10_eax eax;
> diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
> index a5717282bb9c..72bad8314a9c 100644
> --- a/arch/x86/kvm/reverse_cpuid.h
> +++ b/arch/x86/kvm/reverse_cpuid.h
> @@ -15,6 +15,7 @@ enum kvm_only_cpuid_leafs {
> CPUID_12_EAX = NCAPINTS,
> CPUID_7_1_EDX,
> CPUID_8000_0007_EDX,
> + CPUID_7_2_EDX,
> NR_KVM_CPU_CAPS,
>
> NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
> @@ -47,6 +48,9 @@ enum kvm_only_cpuid_leafs {
> /* CPUID level 0x80000007 (EDX). */
> #define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8)
>
> +/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */
> +#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
> +
> struct cpuid_reg {
> u32 function;
> u32 index;
> @@ -69,6 +73,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
> [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
> [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
> [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
> + [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
> [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
> [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
> [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
> @@ -108,6 +113,8 @@ static __always_inline u32 __feature_translate(int x86_feature)
> return KVM_X86_FEATURE_SGX_EDECCSSA;
> else if (x86_feature == X86_FEATURE_CONSTANT_TSC)
> return KVM_X86_FEATURE_CONSTANT_TSC;
> + else if (x86_feature == X86_FEATURE_RRSBA_CTRL)
> + return KVM_X86_FEATURE_RRSBA_CTRL;
>
> return x86_feature;
> }
On Mon, May 15, 2023 at 02:53:07PM +0800, Xiaoyao Li wrote:
>On 4/14/2023 2:25 PM, Chao Gao wrote:
>> From: Zhang Chen <[email protected]>
>>
>> Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL
>> as the first feature in the leaf.
>>
>> RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U
>> (bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to
>> disable RRSBA behavior for CPL3 and CPL0/1/2 respectively.
>>
>> Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses
>> after a non-zero is written to the MSR. Therefore, guests can already
>> toggle the two bits if the host supports RRSBA_CTRL, and no extra code
>> is needed to allow guests to toggle the two bits.
>
>This is a bug that also matters with other bits in MSR_IA32_SPEC_CTRL which
>has a dedicated enumeration CPUID bit and no support in KVM yet.
Do you mean passing through the MSR is a bug? guest can write any hardware
supported value to the MSR if the MSR isn't intercepted.
I guess this is intentional and a trade-off for performance (note that
context-switch may cause writes to the MSR). And see
commit 841c2be09fe4 ("kvm: x86: replace kvm_spec_ctrl_test_value with runtime test on the host")
it appears that this behavior is widely recognized.
>
>I think we need to fix this bug at first.
I have no idea how to fix the "bug" without intercepting the MSR. The
performance penalty makes me think intercepting the MSR is not a viable
solution.
>
>> Signed-off-by: Zhang Chen <[email protected]>
>> Signed-off-by: Chao Gao <[email protected]>
>> Tested-by: Jiaan Lu <[email protected]>
>> ---
>> arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++---
>> arch/x86/kvm/reverse_cpuid.h | 7 +++++++
>> 2 files changed, 26 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> index 9583a110cf5f..f024c3ac2203 100644
>> --- a/arch/x86/kvm/cpuid.c
>> +++ b/arch/x86/kvm/cpuid.c
>> @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void)
>> SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
>> );
>> + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
>> + SF(RRSBA_CTRL)
>> + );
>> +
>
>Please move this hook up to right follow the leaf CPUID_7_1_EAX.
sure. will do.
On 5/16/2023 10:04 AM, Chao Gao wrote:
> On Mon, May 15, 2023 at 02:53:07PM +0800, Xiaoyao Li wrote:
>> On 4/14/2023 2:25 PM, Chao Gao wrote:
>>> From: Zhang Chen <[email protected]>
>>>
>>> Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL
>>> as the first feature in the leaf.
>>>
>>> RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U
>>> (bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to
>>> disable RRSBA behavior for CPL3 and CPL0/1/2 respectively.
>>>
>>> Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses
>>> after a non-zero is written to the MSR. Therefore, guests can already
>>> toggle the two bits if the host supports RRSBA_CTRL, and no extra code
>>> is needed to allow guests to toggle the two bits.
>>
>> This is a bug that also matters with other bits in MSR_IA32_SPEC_CTRL which
>> has a dedicated enumeration CPUID bit and no support in KVM yet.
>
> Do you mean passing through the MSR is a bug? guest can write any hardware
> supported value to the MSR if the MSR isn't intercepted.
>
> I guess this is intentional and a trade-off for performance (note that
> context-switch may cause writes to the MSR). And see
>
> commit 841c2be09fe4 ("kvm: x86: replace kvm_spec_ctrl_test_value with runtime test on the host")
>
> it appears that this behavior is widely recognized.
>
>>
>> I think we need to fix this bug at first.
>
> I have no idea how to fix the "bug" without intercepting the MSR. The
> performance penalty makes me think intercepting the MSR is not a viable
> solution.
I thought correctness always takes higher priority over performance.
>>
>>> Signed-off-by: Zhang Chen <[email protected]>
>>> Signed-off-by: Chao Gao <[email protected]>
>>> Tested-by: Jiaan Lu <[email protected]>
>>> ---
>>> arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++---
>>> arch/x86/kvm/reverse_cpuid.h | 7 +++++++
>>> 2 files changed, 26 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>> index 9583a110cf5f..f024c3ac2203 100644
>>> --- a/arch/x86/kvm/cpuid.c
>>> +++ b/arch/x86/kvm/cpuid.c
>>> @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void)
>>> SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
>>> );
>>> + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
>>> + SF(RRSBA_CTRL)
>>> + );
>>> +
>>
>> Please move this hook up to right follow the leaf CPUID_7_1_EAX.
>
> sure. will do.
On Tue, May 16, 2023 at 10:22:22AM +0800, Xiaoyao Li wrote:
>> > I think we need to fix this bug at first.
>>
>> I have no idea how to fix the "bug" without intercepting the MSR. The
>> performance penalty makes me think intercepting the MSR is not a viable
>> solution.
>
>I thought correctness always takes higher priority over performance.
It is generally true. however, there are situations where we should make
trade-offs between correctness and other factors (like performance):
E.g., instructions without control bits, to be 100% compliant with CPU
spec, in theory, VMMs can trap/decode every instruction and inject #UD
if a guest tries to use some instructions it shouldn't.
On 5/16/2023 11:01 AM, Chao Gao wrote:
> On Tue, May 16, 2023 at 10:22:22AM +0800, Xiaoyao Li wrote:
>>>> I think we need to fix this bug at first.
>>>
>>> I have no idea how to fix the "bug" without intercepting the MSR. The
>>> performance penalty makes me think intercepting the MSR is not a viable
>>> solution.
>>
>> I thought correctness always takes higher priority over performance.
>
> It is generally true. however, there are situations where we should make
> trade-offs between correctness and other factors (like performance):
>
> E.g., instructions without control bits, to be 100% compliant with CPU
> spec, in theory, VMMs can trap/decode every instruction and inject #UD
> if a guest tries to use some instructions it shouldn't.
This is the virtualization hole. IMHO, they are different things.
Pass through MSR_IA32_SPEC_CTRL was introduced in commit d28b387fb74d
("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL"). At that time
there was only a few bits defined, and the changelog called out that
No attempt is made to handle STIBP here, intentionally. Filtering
STIBP may be added in a future patch, which may require trapping all
writes if we don't want to pass it through directly to the guest.
Per my undesrstanding, it implied that we need to re-visit it when more
bits added instead of following the pass-through design siliently.
On Tue, May 16, 2023 at 03:03:15PM +0800, Xiaoyao Li wrote:
>On 5/16/2023 11:01 AM, Chao Gao wrote:
>> On Tue, May 16, 2023 at 10:22:22AM +0800, Xiaoyao Li wrote:
>> > > > I think we need to fix this bug at first.
>> > >
>> > > I have no idea how to fix the "bug" without intercepting the MSR. The
>> > > performance penalty makes me think intercepting the MSR is not a viable
>> > > solution.
>> >
>> > I thought correctness always takes higher priority over performance.
>>
>> It is generally true. however, there are situations where we should make
>> trade-offs between correctness and other factors (like performance):
>>
>> E.g., instructions without control bits, to be 100% compliant with CPU
>> spec, in theory, VMMs can trap/decode every instruction and inject #UD
>> if a guest tries to use some instructions it shouldn't.
>
>This is the virtualization hole. IMHO, they are different things.
what are the differences between?
1. Executing some unsupported instructions should cause #UD. But this is allowed
in a KVM guest.
2. Setting some reserved bits in SPEC_CTRL MSR should cause #GP. But this is
allowed in a KVM guest.
>
>Pass through MSR_IA32_SPEC_CTRL was introduced in commit d28b387fb74d
>("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL"). At that time there
>was only a few bits defined, and the changelog called out that
>
> No attempt is made to handle STIBP here, intentionally. Filtering
> STIBP may be added in a future patch, which may require trapping all
> writes if we don't want to pass it through directly to the guest.
>
>Per my undesrstanding, it implied that we need to re-visit it when more bits
>added instead of following the pass-through design siliently.
I don't object to re-visiting the design. My point is that to prevent guests from
setting RRSBA_CTRL/BHI_CTRL when they are not advertised isn't a strong
justfication for intercepting the MSR. STIBP and other bits (except IBRS) have
the same problem. And the gain of fixing this is too small.
If passing through the SPEC_CTRL MSR to guests might cause security issues, I
would agree to intercept accesses to the MSR.
On 5/16/2023 5:09 PM, Chao Gao wrote:
> On Tue, May 16, 2023 at 03:03:15PM +0800, Xiaoyao Li wrote:
>> On 5/16/2023 11:01 AM, Chao Gao wrote:
>>> On Tue, May 16, 2023 at 10:22:22AM +0800, Xiaoyao Li wrote:
>>>>>> I think we need to fix this bug at first.
>>>>>
>>>>> I have no idea how to fix the "bug" without intercepting the MSR. The
>>>>> performance penalty makes me think intercepting the MSR is not a viable
>>>>> solution.
>>>>
>>>> I thought correctness always takes higher priority over performance.
>>>
>>> It is generally true. however, there are situations where we should make
>>> trade-offs between correctness and other factors (like performance):
>>>
>>> E.g., instructions without control bits, to be 100% compliant with CPU
>>> spec, in theory, VMMs can trap/decode every instruction and inject #UD
>>> if a guest tries to use some instructions it shouldn't.
>>
>> This is the virtualization hole. IMHO, they are different things.
>
> what are the differences between?
> 1. Executing some unsupported instructions should cause #UD. But this is allowed
> in a KVM guest.
> 2. Setting some reserved bits in SPEC_CTRL MSR should cause #GP. But this is
> allowed in a KVM guest.
The difference is that for virtualization hole, there is no way but
intercept and decode every instruction if we want the correctness. It's
a disaster.
But for MSR virtualization, we do have an option and we don't need to
trap every instruction. MSR interception is the designated mechanism to
correctly and elegantly virtualize the MSR.
>>
>> Pass through MSR_IA32_SPEC_CTRL was introduced in commit d28b387fb74d
>> ("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL"). At that time there
>> was only a few bits defined, and the changelog called out that
>>
>> No attempt is made to handle STIBP here, intentionally. Filtering
>> STIBP may be added in a future patch, which may require trapping all
>> writes if we don't want to pass it through directly to the guest.
>>
>> Per my undesrstanding, it implied that we need to re-visit it when more bits
>> added instead of following the pass-through design siliently.
>
> I don't object to re-visiting the design. My point is that to prevent guests from
> setting RRSBA_CTRL/BHI_CTRL when they are not advertised isn't a strong
> justfication for intercepting the MSR. STIBP and other bits (except IBRS) have
> the same problem. And the gain of fixing this is too small.
>
> If passing through the SPEC_CTRL MSR to guests might cause security issues, I
> would agree to intercept accesses to the MSR.
I never buy it. How to interpret the security? If the user wants to hide
one feature from guest but KVM allows it when KVM does have a reasonable
way to hide it. Does it violate the security?
On Thu, May 18, 2023 at 05:50:17PM +0800, Xiaoyao Li wrote:
>On 5/16/2023 5:09 PM, Chao Gao wrote:
>> On Tue, May 16, 2023 at 03:03:15PM +0800, Xiaoyao Li wrote:
>> > On 5/16/2023 11:01 AM, Chao Gao wrote:
>> > > On Tue, May 16, 2023 at 10:22:22AM +0800, Xiaoyao Li wrote:
>> > > > > > I think we need to fix this bug at first.
>> > > > >
>> > > > > I have no idea how to fix the "bug" without intercepting the MSR. The
>> > > > > performance penalty makes me think intercepting the MSR is not a viable
>> > > > > solution.
>> > > >
>> > > > I thought correctness always takes higher priority over performance.
>> > >
>> > > It is generally true. however, there are situations where we should make
>> > > trade-offs between correctness and other factors (like performance):
>> > >
>> > > E.g., instructions without control bits, to be 100% compliant with CPU
>> > > spec, in theory, VMMs can trap/decode every instruction and inject #UD
>> > > if a guest tries to use some instructions it shouldn't.
>> >
>> > This is the virtualization hole. IMHO, they are different things.
>>
>> what are the differences between?
>> 1. Executing some unsupported instructions should cause #UD. But this is allowed
>> in a KVM guest.
>> 2. Setting some reserved bits in SPEC_CTRL MSR should cause #GP. But this is
>> allowed in a KVM guest.
>
>The difference is that for virtualization hole, there is no way but intercept
>and decode every instruction if we want the correctness. It's a disaster.
>
>But for MSR virtualization, we do have an option and we don't need to trap
>every instruction. MSR interception is the designated mechanism to correctly
>and elegantly virtualize the MSR.
The gains in this two cases are similar: some operations in guest are prevented.
But the costs on performance are not. So, how do you draw the line when we can
sacrafice correctness for performance?
>
>> >
>> > Pass through MSR_IA32_SPEC_CTRL was introduced in commit d28b387fb74d
>> > ("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL"). At that time there
>> > was only a few bits defined, and the changelog called out that
>> >
>> > No attempt is made to handle STIBP here, intentionally. Filtering
>> > STIBP may be added in a future patch, which may require trapping all
>> > writes if we don't want to pass it through directly to the guest.
>> >
>> > Per my undesrstanding, it implied that we need to re-visit it when more bits
>> > added instead of following the pass-through design siliently.
>>
>> I don't object to re-visiting the design. My point is that to prevent guests from
>> setting RRSBA_CTRL/BHI_CTRL when they are not advertised isn't a strong
>> justfication for intercepting the MSR. STIBP and other bits (except IBRS) have
>> the same problem. And the gain of fixing this is too small.
>>
>> If passing through the SPEC_CTRL MSR to guests might cause security issues, I
>> would agree to intercept accesses to the MSR.
>
>I never buy it. How to interpret the security? If the user wants to hide one
>feature from guest but KVM allows it when KVM does have a reasonable way to
>hide it. Does it violate the security?
I would say no. By "security", I mean guest becomes vulnerable to some issues or
guest attacks host or guest can access unauthorized data.
I tried to say that if the value of intercepting IA32_SPEC_CTRL outweighs the
perform penalty, it makes sense to do that. I guess the decision has been made
when enabling STIBP and it means people care more about performance than
preventing guests from setting STIBP.