2023-09-14 11:00:40

by Yang Weijiang

[permalink] [raw]
Subject: [PATCH v6 23/25] KVM: x86: Enable CET virtualization for VMX and advertise to userspace

Expose CET features to guest if KVM/host can support them, clear CPUID
feature bits if KVM/host cannot support.

Set CPUID feature bits so that CET features are available in guest CPUID.
Add CR4.CET bit support in order to allow guest set CET master control
bit.

Disable KVM CET feature if unrestricted_guest is unsupported/disabled as
KVM does not support emulating CET.
Don't expose CET feature if either of {U,S}_CET xstate bits is cleared
in host XSS or if XSAVES isn't supported.

The CET load-bits in VM_ENTRY/VM_EXIT control fields should be set to make
guest CET xstates isolated from host's. And all platforms that support CET
enumerate VMX_BASIC[bit56] as 1, clear CET feature bits if the bit doesn't
read 1.

Regarding the CET MSR contents after Reset/INIT, SDM doesn't mention the
default values, neither can I get the answer internally so far, will fill
the gap once it's clear.

Signed-off-by: Yang Weijiang <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/kvm/cpuid.c | 12 ++++++++++--
arch/x86/kvm/vmx/capabilities.h | 6 ++++++
arch/x86/kvm/vmx/vmx.c | 23 ++++++++++++++++++++++-
arch/x86/kvm/vmx/vmx.h | 6 ++++--
arch/x86/kvm/x86.c | 12 +++++++++++-
arch/x86/kvm/x86.h | 3 +++
8 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d77b030e996c..db0010fa3363 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -125,7 +125,8 @@
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+ | X86_CR4_CET))

#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1d111350197f..1f8dc04da468 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1091,6 +1091,7 @@
#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
#define VMX_BASIC_MEM_TYPE_WB 6LLU
#define VMX_BASIC_INOUT 0x0040000000000000LLU
+#define VMX_BASIC_NO_HW_ERROR_CODE_CC 0x0100000000000000LLU

/* Resctrl MSRs: */
/* - Intel: */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 4e7a820cba62..d787a506746a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -654,7 +654,7 @@ void kvm_set_cpu_caps(void)
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
- F(SGX_LC) | F(BUS_LOCK_DETECT)
+ F(SGX_LC) | F(BUS_LOCK_DETECT) | F(SHSTK)
);
/* Set LA57 based on hardware capability. */
if (cpuid_ecx(7) & F(LA57))
@@ -672,7 +672,8 @@ void kvm_set_cpu_caps(void)
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
- F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D)
+ F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D) |
+ F(IBT)
);

/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
@@ -685,6 +686,13 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP);
if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
+ /*
+ * The feature bit in boot_cpu_data.x86_capability could have been
+ * cleared due to ibt=off cmdline option, then add it back if CPU
+ * supports IBT.
+ */
+ if (cpuid_edx(7) & F(IBT))
+ kvm_cpu_cap_set(X86_FEATURE_IBT);

kvm_cpu_cap_mask(CPUID_7_1_EAX,
F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index ee8938818c8a..e12bc233d88b 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -79,6 +79,12 @@ static inline bool cpu_has_vmx_basic_inout(void)
return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
}

+static inline bool cpu_has_vmx_basic_no_hw_errcode(void)
+{
+ return ((u64)vmcs_config.basic_cap << 32) &
+ VMX_BASIC_NO_HW_ERROR_CODE_CC;
+}
+
static inline bool cpu_has_virtual_nmis(void)
{
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS &&
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9ccc2c552f55..f0dea8ecd0c6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2614,6 +2614,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
{ VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
{ VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
{ VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
+ { VM_ENTRY_LOAD_CET_STATE, VM_EXIT_LOAD_CET_STATE },
};

memset(vmcs_conf, 0, sizeof(*vmcs_conf));
@@ -4934,6 +4935,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write64(GUEST_BNDCFGS, 0);

vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
+ vmcs_writel(GUEST_SSP, 0);
+ vmcs_writel(GUEST_S_CET, 0);
+ vmcs_writel(GUEST_INTR_SSP_TABLE, 0);

kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);

@@ -6354,6 +6358,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);

+ if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE) {
+ pr_err("S_CET = 0x%016lx\n", vmcs_readl(GUEST_S_CET));
+ pr_err("SSP = 0x%016lx\n", vmcs_readl(GUEST_SSP));
+ pr_err("INTR SSP TABLE = 0x%016lx\n",
+ vmcs_readl(GUEST_INTR_SSP_TABLE));
+ }
pr_err("*** Host State ***\n");
pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
@@ -6431,6 +6441,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
pr_err("Virtual processor ID = 0x%04x\n",
vmcs_read16(VIRTUAL_PROCESSOR_ID));
+ if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE) {
+ pr_err("S_CET = 0x%016lx\n", vmcs_readl(HOST_S_CET));
+ pr_err("SSP = 0x%016lx\n", vmcs_readl(HOST_SSP));
+ pr_err("INTR SSP TABLE = 0x%016lx\n",
+ vmcs_readl(HOST_INTR_SSP_TABLE));
+ }
}

/*
@@ -7967,7 +7983,6 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_UMIP);

/* CPUID 0xD.1 */
- kvm_caps.supported_xss = 0;
if (!cpu_has_vmx_xsaves())
kvm_cpu_cap_clear(X86_FEATURE_XSAVES);

@@ -7979,6 +7994,12 @@ static __init void vmx_set_cpu_caps(void)

if (cpu_has_vmx_waitpkg())
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
+
+ if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
+ !cpu_has_vmx_basic_no_hw_errcode()) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ }
}

static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index c2130d2c8e24..fb72819fbb41 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -480,7 +480,8 @@ static inline u8 vmx_get_rvi(void)
VM_ENTRY_LOAD_IA32_EFER | \
VM_ENTRY_LOAD_BNDCFGS | \
VM_ENTRY_PT_CONCEAL_PIP | \
- VM_ENTRY_LOAD_IA32_RTIT_CTL)
+ VM_ENTRY_LOAD_IA32_RTIT_CTL | \
+ VM_ENTRY_LOAD_CET_STATE)

#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
(VM_EXIT_SAVE_DEBUG_CONTROLS | \
@@ -502,7 +503,8 @@ static inline u8 vmx_get_rvi(void)
VM_EXIT_LOAD_IA32_EFER | \
VM_EXIT_CLEAR_BNDCFGS | \
VM_EXIT_PT_CONCEAL_PIP | \
- VM_EXIT_CLEAR_IA32_RTIT_CTL)
+ VM_EXIT_CLEAR_IA32_RTIT_CTL | \
+ VM_EXIT_LOAD_CET_STATE)

#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
(PIN_BASED_EXT_INTR_MASK | \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 231d4a7b6f3d..b7d1ac6b8d75 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -231,7 +231,8 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)

-#define KVM_SUPPORTED_XSS 0
+#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_USER | \
+ XFEATURE_MASK_CET_KERNEL)

u64 __read_mostly host_efer;
EXPORT_SYMBOL_GPL(host_efer);
@@ -9699,6 +9700,15 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
kvm_caps.supported_xss = 0;

+ if ((kvm_caps.supported_xss & (XFEATURE_MASK_CET_USER |
+ XFEATURE_MASK_CET_KERNEL)) !=
+ (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ kvm_caps.supported_xss &= ~XFEATURE_CET_USER;
+ kvm_caps.supported_xss &= ~XFEATURE_CET_KERNEL;
+ }
+
#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
#undef __kvm_cpu_cap_has
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 0d5f673338dd..665a7f91d04f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -530,6 +530,9 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
__reserved_bits |= X86_CR4_VMXE; \
if (!__cpu_has(__c, X86_FEATURE_PCID)) \
__reserved_bits |= X86_CR4_PCIDE; \
+ if (!__cpu_has(__c, X86_FEATURE_SHSTK) && \
+ !__cpu_has(__c, X86_FEATURE_IBT)) \
+ __reserved_bits |= X86_CR4_CET; \
__reserved_bits; \
})

--
2.27.0


2023-09-24 18:02:19

by Oliver Sang

[permalink] [raw]
Subject: Re: [PATCH v6 23/25] KVM: x86: Enable CET virtualization for VMX and advertise to userspace



Hello,

kernel test robot noticed "WARNING:at_arch/x86/kvm/vmx/vmx.c:#vmwrite_error[kvm_intel]" on:

commit: 68d0338a67df85ab18482295976e7bd873987165 ("[PATCH v6 23/25] KVM: x86: Enable CET virtualization for VMX and advertise to userspace")
url: https://github.com/intel-lab-lkp/linux/commits/Yang-Weijiang/x86-fpu-xstate-Manually-check-and-add-XFEATURE_CET_USER-xstate-bit/20230914-174056
patch link: https://lore.kernel.org/all/[email protected]/
patch subject: [PATCH v6 23/25] KVM: x86: Enable CET virtualization for VMX and advertise to userspace

in testcase: kvm-unit-tests-qemu
version:
with following parameters:




compiler: gcc-12
test machine: 128 threads 2 sockets Intel(R) Xeon(R) Platinum 8358 CPU @ 2.60GHz (Ice Lake) with 128G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-lkp/[email protected]



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20230924/[email protected]



[ 271.856711][T15436] ------------[ cut here ]------------
[ 271.863011][T15436] vmwrite failed: field=682a val=0 err=12
[ 271.869458][T15436] WARNING: CPU: 117 PID: 15436 at arch/x86/kvm/vmx/vmx.c:444 vmwrite_error+0x16b/0x2e0 [kvm_intel]
[ 271.880940][T15436] Modules linked in: kvm_intel kvm irqbypass btrfs blake2b_generic xor raid6_pq zstd_compress libcrc32c sd_mod t10_pi crc64_rocksoft_generic crc64_rocksoft crc64 sg intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 rapl intel_cstate ipmi_ssif ahci ast libahci mei_me drm_shmem_helper intel_uncore dax_hmem ioatdma joydev drm_kms_helper acpi_ipmi libata mei intel_pch_thermal dca wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_power_meter acpi_pad fuse drm ip_tables [last unloaded: irqbypass]
[ 271.939752][T15436] CPU: 117 PID: 15436 Comm: qemu-system-x86 Not tainted 6.5.0-12553-g68d0338a67df #1
[ 271.950090][T15436] RIP: 0010:vmwrite_error+0x16b/0x2e0 [kvm_intel]
[ 271.957256][T15436] Code: ff c6 05 f1 4b 82 ff 01 66 90 b9 00 44 00 00 0f 78 c9 0f 86 e0 00 00 00 48 89 ea 48 89 de 48 c7 c7 80 1c d9 c0 e8 c5 b7 c4 bf <0f> 0b e9 ae fe ff ff 48 c7 c0 a0 6f d9 c0 48 ba 00 00 00 00 00 fc
[ 271.978720][T15436] RSP: 0018:ffa000000e117980 EFLAGS: 00010286
[ 271.985599][T15436] RAX: 0000000000000000 RBX: 000000000000682a RCX: ffffffff82216eee
[ 271.994345][T15436] RDX: 1fe2200403fd57c8 RSI: 0000000000000008 RDI: ffa000000e117738
[ 272.003044][T15436] RBP: 0000000000000000 R08: 0000000000000001 R09: fff3fc0001c22ee7
[ 272.011865][T15436] R10: ffa000000e11773f R11: 0000000000000001 R12: ff110011b12a4b20
[ 272.020632][T15436] R13: 0000000000000000 R14: 0000000000000000 R15: ff110011b12a4980
[ 272.029340][T15436] FS: 00007f79fd975700(0000) GS:ff1100201fe80000(0000) knlGS:0000000000000000
[ 272.039141][T15436] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 272.046484][T15436] CR2: 00007f79e8000010 CR3: 00000010d23c0003 CR4: 0000000000773ee0
[ 272.055167][T15436] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 272.063980][T15436] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 272.072749][T15436] PKRU: 55555554
[ 272.076985][T15436] Call Trace:
[ 272.080947][T15436] <TASK>
[ 272.084650][T15436] ? __warn+0xcd/0x260
[ 272.089420][T15436] ? vmwrite_error+0x16b/0x2e0 [kvm_intel]
[ 272.096014][T15436] ? report_bug+0x267/0x2d0
[ 272.101163][T15436] ? handle_bug+0x3c/0x70
[ 272.106130][T15436] ? exc_invalid_op+0x17/0x40
[ 272.111483][T15436] ? asm_exc_invalid_op+0x1a/0x20
[ 272.117132][T15436] ? llist_add_batch+0xbe/0x130
[ 272.122685][T15436] ? vmwrite_error+0x16b/0x2e0 [kvm_intel]
[ 272.129113][T15436] vmx_vcpu_reset+0x2382/0x30b0 [kvm_intel]
[ 272.135741][T15436] ? init_vmcs+0x7230/0x7230 [kvm_intel]
[ 272.141988][T15436] ? irq_work_sync+0x8a/0x1f0
[ 272.147302][T15436] ? kvm_clear_async_pf_completion_queue+0x2e6/0x4c0 [kvm]
[ 272.155191][T15436] kvm_vcpu_reset+0x8cc/0x1080 [kvm]
[ 272.161154][T15436] kvm_arch_vcpu_create+0x8c5/0xbd0 [kvm]
[ 272.167584][T15436] kvm_vm_ioctl_create_vcpu+0x4be/0xe20 [kvm]
[ 272.174297][T15436] ? __alloc_pages+0x1d5/0x440
[ 272.179723][T15436] ? kvm_get_dirty_log_protect+0x5f0/0x5f0 [kvm]
[ 272.186757][T15436] ? __alloc_pages_slowpath+0x1cf0/0x1cf0
[ 272.194079][T15436] ? do_user_addr_fault+0x26c/0xac0
[ 272.199837][T15436] ? mem_cgroup_handle_over_high+0x570/0x570
[ 272.206405][T15436] ? _raw_spin_lock+0x85/0xe0
[ 272.211721][T15436] ? _raw_write_lock_irq+0xe0/0xe0
[ 272.217414][T15436] kvm_vm_ioctl+0x939/0xde0 [kvm]
[ 272.223014][T15436] ? __mod_memcg_lruvec_state+0x100/0x220
[ 272.229278][T15436] ? kvm_unregister_device_ops+0x90/0x90 [kvm]
[ 272.235978][T15436] ? __mod_lruvec_page_state+0x1ad/0x3a0
[ 272.242092][T15436] ? perf_trace_mm_lru_insertion+0x7c0/0x7c0
[ 272.248627][T15436] ? folio_batch_add_and_move+0xc1/0x110
[ 272.254832][T15436] ? do_anonymous_page+0x5e2/0xc10
[ 272.260431][T15436] ? up_write+0x52/0x90
[ 272.265006][T15436] ? vfs_fileattr_set+0x4e0/0x4e0
[ 272.270502][T15436] ? copy_page_range+0x880/0x880
[ 272.275831][T15436] ? __count_memcg_events+0xdd/0x1e0
[ 272.281564][T15436] ? handle_mm_fault+0x187/0x7a0
[ 272.286855][T15436] ? __fget_light+0x236/0x4d0
[ 272.291883][T15436] __x64_sys_ioctl+0x130/0x1a0
[ 272.296994][T15436] do_syscall_64+0x38/0x80
[ 272.301756][T15436] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 272.307993][T15436] RIP: 0033:0x7f79fe886237
[ 272.312758][T15436] Code: 00 00 00 48 8b 05 59 cc 0d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 29 cc 0d 00 f7 d8 64 89 01 48
[ 272.333241][T15436] RSP: 002b:00007f79fd974808 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[ 272.342024][T15436] RAX: ffffffffffffffda RBX: 000000000000ae41 RCX: 00007f79fe886237
[ 272.350428][T15436] RDX: 0000000000000000 RSI: 000000000000ae41 RDI: 000000000000000d
[ 272.358789][T15436] RBP: 00005606ece4cc90 R08: 0000000000000000 R09: 0000000000000000
[ 272.367151][T15436] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[ 272.375587][T15436] R13: 00007ffefe5a1daf R14: 00007f79fd974a80 R15: 0000000000802000
[ 272.383950][T15436] </TASK>
[ 272.387416][T15436] ---[ end trace 0000000000000000 ]---
[ 272.393295][T15436] kvm_intel: vmwrite failed: field=682a val=0 err=12



--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-09-25 03:27:11

by Yang Weijiang

[permalink] [raw]
Subject: Re: [PATCH v6 23/25] KVM: x86: Enable CET virtualization for VMX and advertise to userspace


It's due to lack of capability check, I will fix the calltrace in next verison.

On 9/24/2023 9:38 PM, kernel test robot wrote:
>
> Hello,
>
> kernel test robot noticed "WARNING:at_arch/x86/kvm/vmx/vmx.c:#vmwrite_error[kvm_intel]" on:
>
> commit: 68d0338a67df85ab18482295976e7bd873987165 ("[PATCH v6 23/25] KVM: x86: Enable CET virtualization for VMX and advertise to userspace")
> url: https://github.com/intel-lab-lkp/linux/commits/Yang-Weijiang/x86-fpu-xstate-Manually-check-and-add-XFEATURE_CET_USER-xstate-bit/20230914-174056
> patch link: https://lore.kernel.org/all/[email protected]/
> patch subject: [PATCH v6 23/25] KVM: x86: Enable CET virtualization for VMX and advertise to userspace
>
> in testcase: kvm-unit-tests-qemu
> version:
> with following parameters:
>
>
>
>
> compiler: gcc-12
> test machine: 128 threads 2 sockets Intel(R) Xeon(R) Platinum 8358 CPU @ 2.60GHz (Ice Lake) with 128G memory
>
> (please refer to attached dmesg/kmsg for entire log/backtrace)
>
>
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <[email protected]>
> | Closes: https://lore.kernel.org/oe-lkp/[email protected]
>
>
>
> The kernel config and materials to reproduce are available at:
> https://download.01.org/0day-ci/archive/20230924/[email protected]
>
>
>
> [ 271.856711][T15436] ------------[ cut here ]------------
> [ 271.863011][T15436] vmwrite failed: field=682a val=0 err=12
> [ 271.869458][T15436] WARNING: CPU: 117 PID: 15436 at arch/x86/kvm/vmx/vmx.c:444 vmwrite_error+0x16b/0x2e0 [kvm_intel]
> [ 271.880940][T15436] Modules linked in: kvm_intel kvm irqbypass btrfs blake2b_generic xor raid6_pq zstd_compress libcrc32c sd_mod t10_pi crc64_rocksoft_generic crc64_rocksoft crc64 sg intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 rapl intel_cstate ipmi_ssif ahci ast libahci mei_me drm_shmem_helper intel_uncore dax_hmem ioatdma joydev drm_kms_helper acpi_ipmi libata mei intel_pch_thermal dca wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_power_meter acpi_pad fuse drm ip_tables [last unloaded: irqbypass]
> [ 271.939752][T15436] CPU: 117 PID: 15436 Comm: qemu-system-x86 Not tainted 6.5.0-12553-g68d0338a67df #1
> [ 271.950090][T15436] RIP: 0010:vmwrite_error+0x16b/0x2e0 [kvm_intel]
> [ 271.957256][T15436] Code: ff c6 05 f1 4b 82 ff 01 66 90 b9 00 44 00 00 0f 78 c9 0f 86 e0 00 00 00 48 89 ea 48 89 de 48 c7 c7 80 1c d9 c0 e8 c5 b7 c4 bf <0f> 0b e9 ae fe ff ff 48 c7 c0 a0 6f d9 c0 48 ba 00 00 00 00 00 fc
> [ 271.978720][T15436] RSP: 0018:ffa000000e117980 EFLAGS: 00010286
> [ 271.985599][T15436] RAX: 0000000000000000 RBX: 000000000000682a RCX: ffffffff82216eee
> [ 271.994345][T15436] RDX: 1fe2200403fd57c8 RSI: 0000000000000008 RDI: ffa000000e117738
> [ 272.003044][T15436] RBP: 0000000000000000 R08: 0000000000000001 R09: fff3fc0001c22ee7
> [ 272.011865][T15436] R10: ffa000000e11773f R11: 0000000000000001 R12: ff110011b12a4b20
> [ 272.020632][T15436] R13: 0000000000000000 R14: 0000000000000000 R15: ff110011b12a4980
> [ 272.029340][T15436] FS: 00007f79fd975700(0000) GS:ff1100201fe80000(0000) knlGS:0000000000000000
> [ 272.039141][T15436] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 272.046484][T15436] CR2: 00007f79e8000010 CR3: 00000010d23c0003 CR4: 0000000000773ee0
> [ 272.055167][T15436] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 272.063980][T15436] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 272.072749][T15436] PKRU: 55555554
> [ 272.076985][T15436] Call Trace:
> [ 272.080947][T15436] <TASK>
> [ 272.084650][T15436] ? __warn+0xcd/0x260
> [ 272.089420][T15436] ? vmwrite_error+0x16b/0x2e0 [kvm_intel]
> [ 272.096014][T15436] ? report_bug+0x267/0x2d0
> [ 272.101163][T15436] ? handle_bug+0x3c/0x70
> [ 272.106130][T15436] ? exc_invalid_op+0x17/0x40
> [ 272.111483][T15436] ? asm_exc_invalid_op+0x1a/0x20
> [ 272.117132][T15436] ? llist_add_batch+0xbe/0x130
> [ 272.122685][T15436] ? vmwrite_error+0x16b/0x2e0 [kvm_intel]
> [ 272.129113][T15436] vmx_vcpu_reset+0x2382/0x30b0 [kvm_intel]
> [ 272.135741][T15436] ? init_vmcs+0x7230/0x7230 [kvm_intel]
> [ 272.141988][T15436] ? irq_work_sync+0x8a/0x1f0
> [ 272.147302][T15436] ? kvm_clear_async_pf_completion_queue+0x2e6/0x4c0 [kvm]
> [ 272.155191][T15436] kvm_vcpu_reset+0x8cc/0x1080 [kvm]
> [ 272.161154][T15436] kvm_arch_vcpu_create+0x8c5/0xbd0 [kvm]
> [ 272.167584][T15436] kvm_vm_ioctl_create_vcpu+0x4be/0xe20 [kvm]
> [ 272.174297][T15436] ? __alloc_pages+0x1d5/0x440
> [ 272.179723][T15436] ? kvm_get_dirty_log_protect+0x5f0/0x5f0 [kvm]
> [ 272.186757][T15436] ? __alloc_pages_slowpath+0x1cf0/0x1cf0
> [ 272.194079][T15436] ? do_user_addr_fault+0x26c/0xac0
> [ 272.199837][T15436] ? mem_cgroup_handle_over_high+0x570/0x570
> [ 272.206405][T15436] ? _raw_spin_lock+0x85/0xe0
> [ 272.211721][T15436] ? _raw_write_lock_irq+0xe0/0xe0
> [ 272.217414][T15436] kvm_vm_ioctl+0x939/0xde0 [kvm]
> [ 272.223014][T15436] ? __mod_memcg_lruvec_state+0x100/0x220
> [ 272.229278][T15436] ? kvm_unregister_device_ops+0x90/0x90 [kvm]
> [ 272.235978][T15436] ? __mod_lruvec_page_state+0x1ad/0x3a0
> [ 272.242092][T15436] ? perf_trace_mm_lru_insertion+0x7c0/0x7c0
> [ 272.248627][T15436] ? folio_batch_add_and_move+0xc1/0x110
> [ 272.254832][T15436] ? do_anonymous_page+0x5e2/0xc10
> [ 272.260431][T15436] ? up_write+0x52/0x90
> [ 272.265006][T15436] ? vfs_fileattr_set+0x4e0/0x4e0
> [ 272.270502][T15436] ? copy_page_range+0x880/0x880
> [ 272.275831][T15436] ? __count_memcg_events+0xdd/0x1e0
> [ 272.281564][T15436] ? handle_mm_fault+0x187/0x7a0
> [ 272.286855][T15436] ? __fget_light+0x236/0x4d0
> [ 272.291883][T15436] __x64_sys_ioctl+0x130/0x1a0
> [ 272.296994][T15436] do_syscall_64+0x38/0x80
> [ 272.301756][T15436] entry_SYSCALL_64_after_hwframe+0x63/0xcd
> [ 272.307993][T15436] RIP: 0033:0x7f79fe886237
> [ 272.312758][T15436] Code: 00 00 00 48 8b 05 59 cc 0d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 29 cc 0d 00 f7 d8 64 89 01 48
> [ 272.333241][T15436] RSP: 002b:00007f79fd974808 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> [ 272.342024][T15436] RAX: ffffffffffffffda RBX: 000000000000ae41 RCX: 00007f79fe886237
> [ 272.350428][T15436] RDX: 0000000000000000 RSI: 000000000000ae41 RDI: 000000000000000d
> [ 272.358789][T15436] RBP: 00005606ece4cc90 R08: 0000000000000000 R09: 0000000000000000
> [ 272.367151][T15436] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
> [ 272.375587][T15436] R13: 00007ffefe5a1daf R14: 00007f79fd974a80 R15: 0000000000802000
> [ 272.383950][T15436] </TASK>
> [ 272.387416][T15436] ---[ end trace 0000000000000000 ]---
> [ 272.393295][T15436] kvm_intel: vmwrite failed: field=682a val=0 err=12
>
>
>

2023-10-31 17:58:10

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v6 23/25] KVM: x86: Enable CET virtualization for VMX and advertise to userspace

On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote:
> Expose CET features to guest if KVM/host can support them, clear CPUID
> feature bits if KVM/host cannot support.
>
> Set CPUID feature bits so that CET features are available in guest CPUID.
> Add CR4.CET bit support in order to allow guest set CET master control
> bit.
>
> Disable KVM CET feature if unrestricted_guest is unsupported/disabled as
> KVM does not support emulating CET.
> Don't expose CET feature if either of {U,S}_CET xstate bits is cleared
> in host XSS or if XSAVES isn't supported.
>
> The CET load-bits in VM_ENTRY/VM_EXIT control fields should be set to make
> guest CET xstates isolated from host's. And all platforms that support CET
> enumerate VMX_BASIC[bit56] as 1, clear CET feature bits if the bit doesn't
> read 1.
>
> Regarding the CET MSR contents after Reset/INIT, SDM doesn't mention the
> default values, neither can I get the answer internally so far, will fill
> the gap once it's clear.
>
> Signed-off-by: Yang Weijiang <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 3 ++-
> arch/x86/include/asm/msr-index.h | 1 +
> arch/x86/kvm/cpuid.c | 12 ++++++++++--
> arch/x86/kvm/vmx/capabilities.h | 6 ++++++
> arch/x86/kvm/vmx/vmx.c | 23 ++++++++++++++++++++++-
> arch/x86/kvm/vmx/vmx.h | 6 ++++--
> arch/x86/kvm/x86.c | 12 +++++++++++-
> arch/x86/kvm/x86.h | 3 +++
> 8 files changed, 59 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d77b030e996c..db0010fa3363 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -125,7 +125,8 @@
> | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
> | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
> | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
> - | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
> + | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
> + | X86_CR4_CET))
>
> #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
>
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 1d111350197f..1f8dc04da468 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -1091,6 +1091,7 @@
> #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
> #define VMX_BASIC_MEM_TYPE_WB 6LLU
> #define VMX_BASIC_INOUT 0x0040000000000000LLU
> +#define VMX_BASIC_NO_HW_ERROR_CODE_CC 0x0100000000000000LLU
>
> /* Resctrl MSRs: */
> /* - Intel: */
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 4e7a820cba62..d787a506746a 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -654,7 +654,7 @@ void kvm_set_cpu_caps(void)
> F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
> F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
> F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
> - F(SGX_LC) | F(BUS_LOCK_DETECT)
> + F(SGX_LC) | F(BUS_LOCK_DETECT) | F(SHSTK)
> );
> /* Set LA57 based on hardware capability. */
> if (cpuid_ecx(7) & F(LA57))
> @@ -672,7 +672,8 @@ void kvm_set_cpu_caps(void)
> F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
> F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
> F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
> - F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D)
> + F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D) |
> + F(IBT)
> );
>
> /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
> @@ -685,6 +686,13 @@ void kvm_set_cpu_caps(void)
> kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP);
> if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
> kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
> + /*
> + * The feature bit in boot_cpu_data.x86_capability could have been
> + * cleared due to ibt=off cmdline option, then add it back if CPU
> + * supports IBT.
> + */
> + if (cpuid_edx(7) & F(IBT))
> + kvm_cpu_cap_set(X86_FEATURE_IBT);

The usual policy is that when the host doesn't support a feature, then the guest
should not support it either. On the other hand, for this particular feature,
it is probably safe to use it. Just a point for a discussion.

>
> kvm_cpu_cap_mask(CPUID_7_1_EAX,
> F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
> diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
> index ee8938818c8a..e12bc233d88b 100644
> --- a/arch/x86/kvm/vmx/capabilities.h
> +++ b/arch/x86/kvm/vmx/capabilities.h
> @@ -79,6 +79,12 @@ static inline bool cpu_has_vmx_basic_inout(void)
> return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
> }
>
> +static inline bool cpu_has_vmx_basic_no_hw_errcode(void)
> +{
> + return ((u64)vmcs_config.basic_cap << 32) &
> + VMX_BASIC_NO_HW_ERROR_CODE_CC;
> +}

I see, this is because #CP does have an error code but when bit 56 of IA32_VMX_BASIC,
is clear then error code must be present iff exception is within a hardcoded list of exceptions,
and #CP is not on this list because back then the #CP didn't exist, and all new CPUs do
have this bit 56 set.

But I am not 100% sure that this check is worth it, I don't mind having it though,
but please add a comment explaining why the bit 56 is needed for CET.

> +
> static inline bool cpu_has_virtual_nmis(void)
> {
> return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS &&
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9ccc2c552f55..f0dea8ecd0c6 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2614,6 +2614,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
> { VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
> { VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
> { VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
> + { VM_ENTRY_LOAD_CET_STATE, VM_EXIT_LOAD_CET_STATE },
> };
>
> memset(vmcs_conf, 0, sizeof(*vmcs_conf));
> @@ -4934,6 +4935,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> vmcs_write64(GUEST_BNDCFGS, 0);
>
> vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */


I guess that the below 3 writes should only be done if CET is supported,
this is what the kernel test robot is complaining about.


> + vmcs_writel(GUEST_SSP, 0);
> + vmcs_writel(GUEST_S_CET, 0);
> + vmcs_writel(GUEST_INTR_SSP_TABLE, 0);
>
> kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
>
> @@ -6354,6 +6358,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
> if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
> vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
>
> + if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE) {
> + pr_err("S_CET = 0x%016lx\n", vmcs_readl(GUEST_S_CET));
> + pr_err("SSP = 0x%016lx\n", vmcs_readl(GUEST_SSP));
> + pr_err("INTR SSP TABLE = 0x%016lx\n",
> + vmcs_readl(GUEST_INTR_SSP_TABLE));
> + }
> pr_err("*** Host State ***\n");
> pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
> vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
> @@ -6431,6 +6441,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
> if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
> pr_err("Virtual processor ID = 0x%04x\n",
> vmcs_read16(VIRTUAL_PROCESSOR_ID));
> + if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE) {
> + pr_err("S_CET = 0x%016lx\n", vmcs_readl(HOST_S_CET));
> + pr_err("SSP = 0x%016lx\n", vmcs_readl(HOST_SSP));
> + pr_err("INTR SSP TABLE = 0x%016lx\n",
> + vmcs_readl(HOST_INTR_SSP_TABLE));
> + }
> }
>
> /*
> @@ -7967,7 +7983,6 @@ static __init void vmx_set_cpu_caps(void)
> kvm_cpu_cap_set(X86_FEATURE_UMIP);
>
> /* CPUID 0xD.1 */
> - kvm_caps.supported_xss = 0;
> if (!cpu_has_vmx_xsaves())
> kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
>
> @@ -7979,6 +7994,12 @@ static __init void vmx_set_cpu_caps(void)
>
> if (cpu_has_vmx_waitpkg())
> kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
> +
> + if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
> + !cpu_has_vmx_basic_no_hw_errcode()) {
> + kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
> + kvm_cpu_cap_clear(X86_FEATURE_IBT);

I think that here we also need to clear kvm_caps.supported_xss,
or even better, lets set the CET bits in kvm_caps.supported_xss only
once CET is fully enabled (both this check and check in __kvm_x86_vendor_init pass).

> + }
> }
>
> static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index c2130d2c8e24..fb72819fbb41 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -480,7 +480,8 @@ static inline u8 vmx_get_rvi(void)
> VM_ENTRY_LOAD_IA32_EFER | \
> VM_ENTRY_LOAD_BNDCFGS | \
> VM_ENTRY_PT_CONCEAL_PIP | \
> - VM_ENTRY_LOAD_IA32_RTIT_CTL)
> + VM_ENTRY_LOAD_IA32_RTIT_CTL | \
> + VM_ENTRY_LOAD_CET_STATE)
>
> #define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
> (VM_EXIT_SAVE_DEBUG_CONTROLS | \
> @@ -502,7 +503,8 @@ static inline u8 vmx_get_rvi(void)
> VM_EXIT_LOAD_IA32_EFER | \
> VM_EXIT_CLEAR_BNDCFGS | \
> VM_EXIT_PT_CONCEAL_PIP | \
> - VM_EXIT_CLEAR_IA32_RTIT_CTL)
> + VM_EXIT_CLEAR_IA32_RTIT_CTL | \
> + VM_EXIT_LOAD_CET_STATE)
>
> #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
> (PIN_BASED_EXT_INTR_MASK | \
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 231d4a7b6f3d..b7d1ac6b8d75 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -231,7 +231,8 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
> | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
> | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
>
> -#define KVM_SUPPORTED_XSS 0
> +#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_USER | \
> + XFEATURE_MASK_CET_KERNEL)
>
> u64 __read_mostly host_efer;
> EXPORT_SYMBOL_GPL(host_efer);
> @@ -9699,6 +9700,15 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
> if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
> kvm_caps.supported_xss = 0;
>
> + if ((kvm_caps.supported_xss & (XFEATURE_MASK_CET_USER |
> + XFEATURE_MASK_CET_KERNEL)) !=
> + (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)) {
> + kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
> + kvm_cpu_cap_clear(X86_FEATURE_IBT);
> + kvm_caps.supported_xss &= ~XFEATURE_CET_USER;
> + kvm_caps.supported_xss &= ~XFEATURE_CET_KERNEL;
> + }
> +
> #define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
> cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
> #undef __kvm_cpu_cap_has
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 0d5f673338dd..665a7f91d04f 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -530,6 +530,9 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
> __reserved_bits |= X86_CR4_VMXE; \
> if (!__cpu_has(__c, X86_FEATURE_PCID)) \
> __reserved_bits |= X86_CR4_PCIDE; \
> + if (!__cpu_has(__c, X86_FEATURE_SHSTK) && \
> + !__cpu_has(__c, X86_FEATURE_IBT)) \
> + __reserved_bits |= X86_CR4_CET; \
> __reserved_bits; \
> })
>


Best regards,
Maxim Levitsky






2023-11-01 22:15:49

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH v6 23/25] KVM: x86: Enable CET virtualization for VMX and advertise to userspace

On Tue, Oct 31, 2023, Maxim Levitsky wrote:
> On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote:
> > @@ -685,6 +686,13 @@ void kvm_set_cpu_caps(void)
> > kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP);
> > if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
> > kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
> > + /*
> > + * The feature bit in boot_cpu_data.x86_capability could have been
> > + * cleared due to ibt=off cmdline option, then add it back if CPU
> > + * supports IBT.
> > + */
> > + if (cpuid_edx(7) & F(IBT))
> > + kvm_cpu_cap_set(X86_FEATURE_IBT);
>
> The usual policy is that when the host doesn't support a feature, then the guest
> should not support it either. On the other hand, for this particular feature,
> it is probably safe to use it. Just a point for a discussion.

Agreed, this needs extra justification. It's "safe" in theory, but if the admin
disabled IBT because of a ucode bug, then all bets are off.

I'm guessing this was added because of the virtualization hole? I.e. if KVM
allows CR4.CET=1 for shadow stacks, then KVM can't (easily?) prevent the guest
from also using IBT.