The VM entry/exit path is a giant inline assembly statement. Simplify it
by doing CR2 context switching in plain C. Move CR2 restore behind IBRS
clearing, so we reduce the amount of code we execute with IBRS on.
Signed-off-by: Julian Stecklina <[email protected]>
Reviewed-by: Jan H. Schönherr <[email protected]>
Reviewed-by: Konrad Jan Miller <[email protected]>
---
arch/x86/kvm/vmx.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e665aa7..93562d5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10728,6 +10728,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
(unsigned long)¤t_evmcs->host_rsp : 0;
+ if (read_cr2() != vcpu->arch.cr2)
+ write_cr2(vcpu->arch.cr2);
+
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
@@ -10747,13 +10750,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"2: \n\t"
__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
"1: \n\t"
- /* Reload cr2 if changed */
- "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
- "mov %%cr2, %%" _ASM_DX " \n\t"
- "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
- "je 3f \n\t"
- "mov %%" _ASM_AX", %%cr2 \n\t"
- "3: \n\t"
/* Check if vmlaunch of vmresume is needed */
"cmpl $0, %c[launched](%0) \n\t"
/* Load guest registers. Don't clobber flags. */
@@ -10810,8 +10806,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"xor %%r14d, %%r14d \n\t"
"xor %%r15d, %%r15d \n\t"
#endif
- "mov %%cr2, %%" _ASM_AX " \n\t"
- "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
"xor %%eax, %%eax \n\t"
"xor %%ebx, %%ebx \n\t"
@@ -10843,7 +10837,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
[r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
[r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
#endif
- [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
[wordsize]"i"(sizeof(ulong))
: "cc", "memory"
#ifdef CONFIG_X86_64
@@ -10877,6 +10870,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
+ vcpu->arch.cr2 = read_cr2();
+
/* All fields are clean at this point */
if (static_branch_unlikely(&enable_evmcs))
current_evmcs->hv_clean_fields |=
--
2.7.4
So far the VMX code relied on manually assembled VMX instructions. This
was apparently done to ensure compatibility with old binutils. VMX
instructions were introduced with binutils 2.19 and the kernel currently
requires binutils 2.20.
Remove the manually assembled versions and replace them with the proper
inline assembly. This improves code generation (and source code
readability).
According to the bloat-o-meter this change removes ~1300 bytes from the
text segment.
Signed-off-by: Julian Stecklina <[email protected]>
Reviewed-by: Jan H. Schönherr <[email protected]>
Reviewed-by: Konrad Jan Miller <[email protected]>
Reviewed-by: Razvan-Alin Ghitulete <[email protected]>
---
arch/x86/include/asm/virtext.h | 2 +-
arch/x86/include/asm/vmx.h | 13 -------------
arch/x86/kvm/vmx.c | 39 ++++++++++++++++++---------------------
3 files changed, 19 insertions(+), 35 deletions(-)
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 0116b2e..c5395b3 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -40,7 +40,7 @@ static inline int cpu_has_vmx(void)
*/
static inline void cpu_vmxoff(void)
{
- asm volatile (ASM_VMX_VMXOFF : : : "cc");
+ asm volatile ("vmxoff" : : : "cc");
cr4_clear_bits(X86_CR4_VMXE);
}
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 9527ba5..ade0f15 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -503,19 +503,6 @@ enum vmcs_field {
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
-
-#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
-#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
-#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
-#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
-#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
-#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
-#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
-#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
-
struct vmx_msr_entry {
u32 index;
u32 reserved;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 82cfb909..bbbdccb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2077,7 +2077,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
return -1;
}
-static inline void __invvpid(int ext, u16 vpid, gva_t gva)
+static inline void __invvpid(long ext, u16 vpid, gva_t gva)
{
struct {
u64 vpid : 16;
@@ -2086,21 +2086,21 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
} operand = { vpid, 0, gva };
bool error;
- asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na)
- : CC_OUT(na) (error) : "a"(&operand), "c"(ext)
+ asm volatile ("invvpid %1, %2" CC_SET(na)
+ : CC_OUT(na) (error) : "m"(operand), "r"(ext)
: "memory");
BUG_ON(error);
}
-static inline void __invept(int ext, u64 eptp, gpa_t gpa)
+static inline void __invept(long ext, u64 eptp, gpa_t gpa)
{
struct {
u64 eptp, gpa;
} operand = {eptp, gpa};
bool error;
- asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na)
- : CC_OUT(na) (error) : "a" (&operand), "c" (ext)
+ asm volatile ("invept %1, %2" CC_SET(na)
+ : CC_OUT(na) (error) : "m" (operand), "r" (ext)
: "memory");
BUG_ON(error);
}
@@ -2120,8 +2120,8 @@ static void vmcs_clear(struct vmcs *vmcs)
u64 phys_addr = __pa(vmcs);
bool error;
- asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na)
- : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
+ asm volatile ("vmclear %1" CC_SET(na)
+ : CC_OUT(na) (error) : "m"(phys_addr)
: "memory");
if (unlikely(error))
printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
@@ -2145,8 +2145,8 @@ static void vmcs_load(struct vmcs *vmcs)
if (static_branch_unlikely(&enable_evmcs))
return evmcs_load(phys_addr);
- asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na)
- : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
+ asm volatile ("vmptrld %1" CC_SET(na)
+ : CC_OUT(na) (error) : "m"(phys_addr)
: "memory");
if (unlikely(error))
printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
@@ -2323,8 +2323,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
{
unsigned long value;
- asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0")
- : "=a"(value) : "d"(field) : "cc");
+ asm volatile ("vmread %1, %0" : "=rm"(value) : "r"(field) : "cc");
return value;
}
@@ -2375,8 +2374,8 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val
{
bool error;
- asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na)
- : CC_OUT(na) (error) : "a"(value), "d"(field));
+ asm volatile ("vmwrite %1, %2" CC_SET(na)
+ : CC_OUT(na) (error) : "rm"(value), "r"(field));
if (unlikely(error))
vmwrite_error(field, value);
}
@@ -4397,9 +4396,7 @@ static void kvm_cpu_vmxon(u64 addr)
cr4_set_bits(X86_CR4_VMXE);
intel_pt_handle_vmx(1);
- asm volatile (ASM_VMX_VMXON_RAX
- : : "a"(&addr), "m"(addr)
- : "memory", "cc");
+ asm volatile ("vmxon %0" : : "m"(addr) : "memory", "cc");
}
static int hardware_enable(void)
@@ -4468,7 +4465,7 @@ static void vmclear_local_loaded_vmcss(void)
*/
static void kvm_cpu_vmxoff(void)
{
- asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+ asm volatile ("vmxoff" : : : "cc");
intel_pt_handle_vmx(0);
cr4_clear_bits(X86_CR4_VMXE);
@@ -10748,7 +10745,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
"jmp 1f \n\t"
"2: \n\t"
- __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
+ "vmwrite %%" _ASM_SP ", %%" _ASM_DX "\n\t"
"1: \n\t"
/* Check if vmlaunch of vmresume is needed */
"cmpl $0, %c[launched](%0) \n\t"
@@ -10773,9 +10770,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
/* Enter guest mode */
"jne 1f \n\t"
- __ex(ASM_VMX_VMLAUNCH) "\n\t"
+ "vmlaunch \n\t"
"jmp 2f \n\t"
- "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
+ "1: vmresume \n\t"
"2: "
/* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
--
2.7.4
The code violated the coding style. Fixed by using tabs instead of
spaces. There are only whitespace changes here.
Signed-off-by: Julian Stecklina <[email protected]>
Reviewed-by: Jan H. Schönherr <[email protected]>
Reviewed-by: Konrad Jan Miller <[email protected]>
---
arch/x86/kvm/vmx.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9225099..82cfb909 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2079,17 +2079,17 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
static inline void __invvpid(int ext, u16 vpid, gva_t gva)
{
- struct {
- u64 vpid : 16;
- u64 rsvd : 48;
- u64 gva;
- } operand = { vpid, 0, gva };
- bool error;
-
- asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na)
- : CC_OUT(na) (error) : "a"(&operand), "c"(ext)
- : "memory");
- BUG_ON(error);
+ struct {
+ u64 vpid : 16;
+ u64 rsvd : 48;
+ u64 gva;
+ } operand = { vpid, 0, gva };
+ bool error;
+
+ asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na)
+ : CC_OUT(na) (error) : "a"(&operand), "c"(ext)
+ : "memory");
+ BUG_ON(error);
}
static inline void __invept(int ext, u64 eptp, gpa_t gpa)
--
2.7.4
Split the security related register clearing out of the large inline
assembly VM entry path. This results in two slightly less complicated
inline assembly statements, where it is clearer what each one does.
Signed-off-by: Julian Stecklina <[email protected]>
Reviewed-by: Jan H. Schönherr <[email protected]>
Reviewed-by: Konrad Jan Miller <[email protected]>
---
arch/x86/kvm/vmx.c | 33 ++++++++++++++++++++-------------
1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 93562d5..9225099 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10797,20 +10797,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
- "xor %%r8d, %%r8d \n\t"
- "xor %%r9d, %%r9d \n\t"
- "xor %%r10d, %%r10d \n\t"
- "xor %%r11d, %%r11d \n\t"
- "xor %%r12d, %%r12d \n\t"
- "xor %%r13d, %%r13d \n\t"
- "xor %%r14d, %%r14d \n\t"
- "xor %%r15d, %%r15d \n\t"
#endif
-
- "xor %%eax, %%eax \n\t"
- "xor %%ebx, %%ebx \n\t"
- "xor %%esi, %%esi \n\t"
- "xor %%edi, %%edi \n\t"
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
".pushsection .rodata \n\t"
".global vmx_return \n\t"
@@ -10847,6 +10834,26 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /* Don't let guest register values survive. */
+ asm volatile (
+ ""
+#ifdef CONFIG_X86_64
+ "xor %%r8d, %%r8d \n\t"
+ "xor %%r9d, %%r9d \n\t"
+ "xor %%r10d, %%r10d \n\t"
+ "xor %%r11d, %%r11d \n\t"
+ "xor %%r12d, %%r12d \n\t"
+ "xor %%r13d, %%r13d \n\t"
+ "xor %%r14d, %%r14d \n\t"
+ "xor %%r15d, %%r15d \n\t"
+#endif
+ :: "a" (0), "b" (0), "S" (0), "D" (0)
+ : "cc"
+#ifdef CONFIG_X86_64
+ , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
+#endif
+ );
+
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
--
2.7.4
On Wed, Oct 24, 2018 at 1:30 AM Julian Stecklina <[email protected]> wrote:
>
> So far the VMX code relied on manually assembled VMX instructions. This
> was apparently done to ensure compatibility with old binutils. VMX
> instructions were introduced with binutils 2.19 and the kernel currently
> requires binutils 2.20.
>
> Remove the manually assembled versions and replace them with the proper
> inline assembly. This improves code generation (and source code
> readability).
>
> According to the bloat-o-meter this change removes ~1300 bytes from the
> text segment.
This loses the exception handling from __ex* ->
____kvm_handle_fault_on_reboot.
If deliberate, this should be called out in changelog. Has the race
which commit 4ecac3fd fixed been mitigated otherwise?
>
> Signed-off-by: Julian Stecklina <[email protected]>
> Reviewed-by: Jan H. Schönherr <[email protected]>
> Reviewed-by: Konrad Jan Miller <[email protected]>
> Reviewed-by: Razvan-Alin Ghitulete <[email protected]>
> ---
> arch/x86/include/asm/virtext.h | 2 +-
> arch/x86/include/asm/vmx.h | 13 -------------
> arch/x86/kvm/vmx.c | 39 ++++++++++++++++++---------------------
> 3 files changed, 19 insertions(+), 35 deletions(-)
>
> diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
> index 0116b2e..c5395b3 100644
> --- a/arch/x86/include/asm/virtext.h
> +++ b/arch/x86/include/asm/virtext.h
> @@ -40,7 +40,7 @@ static inline int cpu_has_vmx(void)
> */
> static inline void cpu_vmxoff(void)
> {
> - asm volatile (ASM_VMX_VMXOFF : : : "cc");
> + asm volatile ("vmxoff" : : : "cc");
> cr4_clear_bits(X86_CR4_VMXE);
> }
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 9527ba5..ade0f15 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -503,19 +503,6 @@ enum vmcs_field {
>
> #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
>
> -
> -#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
> -#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
> -#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
> -#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
> -#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
> -#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
> -#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
> -#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
> -#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
> -#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
> -#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
> -
> struct vmx_msr_entry {
> u32 index;
> u32 reserved;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 82cfb909..bbbdccb 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2077,7 +2077,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
> return -1;
> }
>
> -static inline void __invvpid(int ext, u16 vpid, gva_t gva)
> +static inline void __invvpid(long ext, u16 vpid, gva_t gva)
> {
> struct {
> u64 vpid : 16;
> @@ -2086,21 +2086,21 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
> } operand = { vpid, 0, gva };
> bool error;
>
> - asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na)
> - : CC_OUT(na) (error) : "a"(&operand), "c"(ext)
> + asm volatile ("invvpid %1, %2" CC_SET(na)
> + : CC_OUT(na) (error) : "m"(operand), "r"(ext)
> : "memory");
> BUG_ON(error);
> }
>
> -static inline void __invept(int ext, u64 eptp, gpa_t gpa)
> +static inline void __invept(long ext, u64 eptp, gpa_t gpa)
> {
> struct {
> u64 eptp, gpa;
> } operand = {eptp, gpa};
> bool error;
>
> - asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na)
> - : CC_OUT(na) (error) : "a" (&operand), "c" (ext)
> + asm volatile ("invept %1, %2" CC_SET(na)
> + : CC_OUT(na) (error) : "m" (operand), "r" (ext)
> : "memory");
> BUG_ON(error);
> }
> @@ -2120,8 +2120,8 @@ static void vmcs_clear(struct vmcs *vmcs)
> u64 phys_addr = __pa(vmcs);
> bool error;
>
> - asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na)
> - : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
> + asm volatile ("vmclear %1" CC_SET(na)
> + : CC_OUT(na) (error) : "m"(phys_addr)
> : "memory");
> if (unlikely(error))
> printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
> @@ -2145,8 +2145,8 @@ static void vmcs_load(struct vmcs *vmcs)
> if (static_branch_unlikely(&enable_evmcs))
> return evmcs_load(phys_addr);
>
> - asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na)
> - : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
> + asm volatile ("vmptrld %1" CC_SET(na)
> + : CC_OUT(na) (error) : "m"(phys_addr)
> : "memory");
> if (unlikely(error))
> printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
> @@ -2323,8 +2323,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
> {
> unsigned long value;
>
> - asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0")
> - : "=a"(value) : "d"(field) : "cc");
> + asm volatile ("vmread %1, %0" : "=rm"(value) : "r"(field) : "cc");
> return value;
> }
>
> @@ -2375,8 +2374,8 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val
> {
> bool error;
>
> - asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na)
> - : CC_OUT(na) (error) : "a"(value), "d"(field));
> + asm volatile ("vmwrite %1, %2" CC_SET(na)
> + : CC_OUT(na) (error) : "rm"(value), "r"(field));
> if (unlikely(error))
> vmwrite_error(field, value);
> }
> @@ -4397,9 +4396,7 @@ static void kvm_cpu_vmxon(u64 addr)
> cr4_set_bits(X86_CR4_VMXE);
> intel_pt_handle_vmx(1);
>
> - asm volatile (ASM_VMX_VMXON_RAX
> - : : "a"(&addr), "m"(addr)
> - : "memory", "cc");
> + asm volatile ("vmxon %0" : : "m"(addr) : "memory", "cc");
> }
>
> static int hardware_enable(void)
> @@ -4468,7 +4465,7 @@ static void vmclear_local_loaded_vmcss(void)
> */
> static void kvm_cpu_vmxoff(void)
> {
> - asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
> + asm volatile ("vmxoff" : : : "cc");
>
> intel_pt_handle_vmx(0);
> cr4_clear_bits(X86_CR4_VMXE);
> @@ -10748,7 +10745,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
> "jmp 1f \n\t"
> "2: \n\t"
> - __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
> + "vmwrite %%" _ASM_SP ", %%" _ASM_DX "\n\t"
> "1: \n\t"
> /* Check if vmlaunch of vmresume is needed */
> "cmpl $0, %c[launched](%0) \n\t"
> @@ -10773,9 +10770,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>
> /* Enter guest mode */
> "jne 1f \n\t"
> - __ex(ASM_VMX_VMLAUNCH) "\n\t"
> + "vmlaunch \n\t"
> "jmp 2f \n\t"
> - "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
> + "1: vmresume \n\t"
> "2: "
> /* Save guest registers, load host registers, keep flags */
> "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
> --
> 2.7.4
>
On Wed, Oct 24, 2018 at 1:28 AM, Julian Stecklina <[email protected]> wrote:
> Split the security related register clearing out of the large inline
> assembly VM entry path. This results in two slightly less complicated
> inline assembly statements, where it is clearer what each one does.
>
> Signed-off-by: Julian Stecklina <[email protected]>
> Reviewed-by: Jan H. Schönherr <[email protected]>
> Reviewed-by: Konrad Jan Miller <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 33 ++++++++++++++++++++-------------
> 1 file changed, 20 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 93562d5..9225099 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -10797,20 +10797,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> "mov %%r13, %c[r13](%0) \n\t"
> "mov %%r14, %c[r14](%0) \n\t"
> "mov %%r15, %c[r15](%0) \n\t"
> - "xor %%r8d, %%r8d \n\t"
> - "xor %%r9d, %%r9d \n\t"
> - "xor %%r10d, %%r10d \n\t"
> - "xor %%r11d, %%r11d \n\t"
> - "xor %%r12d, %%r12d \n\t"
> - "xor %%r13d, %%r13d \n\t"
> - "xor %%r14d, %%r14d \n\t"
> - "xor %%r15d, %%r15d \n\t"
> #endif
> -
> - "xor %%eax, %%eax \n\t"
> - "xor %%ebx, %%ebx \n\t"
> - "xor %%esi, %%esi \n\t"
> - "xor %%edi, %%edi \n\t"
> "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
> ".pushsection .rodata \n\t"
> ".global vmx_return \n\t"
> @@ -10847,6 +10834,26 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> #endif
> );
>
> + /* Don't let guest register values survive. */
> + asm volatile (
> + ""
> +#ifdef CONFIG_X86_64
> + "xor %%r8d, %%r8d \n\t"
> + "xor %%r9d, %%r9d \n\t"
> + "xor %%r10d, %%r10d \n\t"
> + "xor %%r11d, %%r11d \n\t"
> + "xor %%r12d, %%r12d \n\t"
> + "xor %%r13d, %%r13d \n\t"
> + "xor %%r14d, %%r14d \n\t"
> + "xor %%r15d, %%r15d \n\t"
> +#endif
> + :: "a" (0), "b" (0), "S" (0), "D" (0)
> + : "cc"
> +#ifdef CONFIG_X86_64
> + , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
> +#endif
> + );
> +
Looking at the second asm statement and the comment that precedes it,
my first question would be, "What about the registers not covered
here?" I'm also not convinced that the register-clearing asm statement
is actually "clearer" with some registers cleared as input arguments
and others cleared explicitly, but otherwise, the change looks fine to
me.
Reviewed-by: Jim Mattson <[email protected]>
On Wed, Oct 24, 2018 at 1:28 AM, Julian Stecklina <[email protected]> wrote:
> The VM entry/exit path is a giant inline assembly statement. Simplify it
> by doing CR2 context switching in plain C. Move CR2 restore behind IBRS
> clearing, so we reduce the amount of code we execute with IBRS on.
>
> Signed-off-by: Julian Stecklina <[email protected]>
> Reviewed-by: Jan H. Schönherr <[email protected]>
> Reviewed-by: Konrad Jan Miller <[email protected]>
Reviewed-by: Jim Mattson <[email protected]>
On Thu, 2018-10-25 at 09:55 -0700, Jim Mattson wrote:
> Looking at the second asm statement and the comment that precedes it,
> my first question would be, "What about the registers not covered
> here?"
Good point. I'll make the comment a bit clearer.
> I'm also not convinced that the register-clearing asm statement
> is actually "clearer" with some registers cleared as input arguments
> and others cleared explicitly, but otherwise, the change looks fine
> to
> me.
The inline assembly itself may not be clearer, but it has a single
purpose now. You are free to ignore it if you want to change aspects of
the VM entry path. It almost fits on a single screen now. ;)
>
> Reviewed-by: Jim Mattson <[email protected]>
Thanks!
Julian
Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B
On Wed, 2018-10-24 at 10:44 -0700, Eric Northup wrote:
> This loses the exception handling from __ex* ->
> ____kvm_handle_fault_on_reboot.
>
> If deliberate, this should be called out in changelog. Has the race
> which commit 4ecac3fd fixed been mitigated otherwise?
No, this was not deliberate. Will fix.
Thanks!
Julian
Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B
On Fri, Oct 26, 2018 at 10:46:27AM +0000, Stecklina, Julian wrote:
> On Wed, 2018-10-24 at 10:44 -0700, Eric Northup wrote:
> > This loses the exception handling from __ex* ->
> > ____kvm_handle_fault_on_reboot.
> >
> > If deliberate, this should be called out in changelog.? Has the race
> > which commit 4ecac3fd fixed been mitigated otherwise?
>
> No, this was not deliberate. Will fix.
This is already in kvm/next as commit 4b1e54786e48 ("KVM/x86: Use
assembly instruction mnemonics instead of .byte streams"), this patch
can be dropped.
On Wed, Oct 24, 2018 at 10:28:57AM +0200, Julian Stecklina wrote:
> Split the security related register clearing out of the large inline
> assembly VM entry path. This results in two slightly less complicated
> inline assembly statements, where it is clearer what each one does.
>
> Signed-off-by: Julian Stecklina <[email protected]>
> Reviewed-by: Jan H. Sch?nherr <[email protected]>
> Reviewed-by: Konrad Jan Miller <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 33 ++++++++++++++++++++-------------
> 1 file changed, 20 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 93562d5..9225099 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -10797,20 +10797,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> "mov %%r13, %c[r13](%0) \n\t"
> "mov %%r14, %c[r14](%0) \n\t"
> "mov %%r15, %c[r15](%0) \n\t"
> - "xor %%r8d, %%r8d \n\t"
> - "xor %%r9d, %%r9d \n\t"
> - "xor %%r10d, %%r10d \n\t"
> - "xor %%r11d, %%r11d \n\t"
> - "xor %%r12d, %%r12d \n\t"
> - "xor %%r13d, %%r13d \n\t"
> - "xor %%r14d, %%r14d \n\t"
> - "xor %%r15d, %%r15d \n\t"
> #endif
> -
> - "xor %%eax, %%eax \n\t"
> - "xor %%ebx, %%ebx \n\t"
> - "xor %%esi, %%esi \n\t"
> - "xor %%edi, %%edi \n\t"
> "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
> ".pushsection .rodata \n\t"
> ".global vmx_return \n\t"
> @@ -10847,6 +10834,26 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> #endif
> );
>
> + /* Don't let guest register values survive. */
> + asm volatile (
> + ""
> +#ifdef CONFIG_X86_64
> + "xor %%r8d, %%r8d \n\t"
> + "xor %%r9d, %%r9d \n\t"
> + "xor %%r10d, %%r10d \n\t"
> + "xor %%r11d, %%r11d \n\t"
> + "xor %%r12d, %%r12d \n\t"
> + "xor %%r13d, %%r13d \n\t"
> + "xor %%r14d, %%r14d \n\t"
> + "xor %%r15d, %%r15d \n\t"
> +#endif
> + :: "a" (0), "b" (0), "S" (0), "D" (0)
Since clearing the GPRs exists to mitigate speculation junk, I think
we should keep the explicit XOR zeroing instead of deferring to the
compiler. Explicit XORs will ensure the resulting assembly is the
same regardless of compiler, version, target arch, etc..., whereas the
compiler could theoretically use different zeroing methods[1], e.g. on
my system it generates "mov r32,r32" for EBX, ESI and EDI (loading
from EAX after EAX is zeroed).
And FWIW, I find the original code to be more readable since all GRPs
are zeroed with the same method.
[1] As an aside, I was expecting gcc to generate "xor r32,r32" with
-mtune=sandybridge as sandybridge can do mov elimination on xors
that explicitly zero a register but not on generic reg-to-reg mov,
but I was unable to coerce gcc into using xor.
> + : "cc"
> +#ifdef CONFIG_X86_64
> + , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
> +#endif
> + );
> +
> /*
> * We do not use IBRS in the kernel. If this vCPU has used the
> * SPEC_CTRL MSR it may have left it on; save the value and
> --
> 2.7.4
>
On Fri, Oct 26, 2018 at 8:46 AM, Sean Christopherson
<[email protected]> wrote:
> Since clearing the GPRs exists to mitigate speculation junk, I think
> we should keep the explicit XOR zeroing instead of deferring to the
> compiler. Explicit XORs will ensure the resulting assembly is the
> same regardless of compiler, version, target arch, etc..., whereas the
> compiler could theoretically use different zeroing methods[1], e.g. on
> my system it generates "mov r32,r32" for EBX, ESI and EDI (loading
> from EAX after EAX is zeroed).
>
> And FWIW, I find the original code to be more readable since all GRPs
> are zeroed with the same method.
I concur. I really do prefer the explicit xors to the input arguments.
On Fri, 2018-10-26 at 09:30 -0700, Jim Mattson wrote:
> On Fri, Oct 26, 2018 at 8:46 AM, Sean Christopherson
> <[email protected]> wrote:
>
> > And FWIW, I find the original code to be more readable since all
> > GRPs
> > are zeroed with the same method.
> I concur. I really do prefer the explicit xors to the input
> arguments.
ACK. Will re-post with xor for everything.
Julian
Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B