2017-08-23 21:26:13

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH 0/2] KVM, pkeys: fix handling of PKRU across migration

The host pkru is restored right after vcpu exit (commit 1be0e61), so
KVM_GET_XSAVE will return the host PKRU value instead. In general,
the PKRU value in vcpu->arch.guest_fpu.state cannot be trusted.

The first patch removes an unnecessary abstraction. The second
fixes the bug.

Please test the patches, as I don't have the affected hardware.

Paolo

Paolo Bonzini (2):
KVM: x86: simplify handling of PKRU
KVM, pkeys: do not use PKRU value in vcpu->arch.guest_fpu.state

arch/x86/include/asm/fpu/internal.h | 6 +++---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/kvm_cache_regs.h | 5 -----
arch/x86/kvm/mmu.h | 2 +-
arch/x86/kvm/svm.c | 7 -------
arch/x86/kvm/vmx.c | 23 ++++++-----------------
arch/x86/kvm/x86.c | 17 ++++++++++++++---
7 files changed, 25 insertions(+), 36 deletions(-)

--
1.8.3.1


2017-08-23 21:26:18

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH 2/2] KVM, pkeys: do not use PKRU value in vcpu->arch.guest_fpu.state

The host pkru is restored right after vcpu exit (commit 1be0e61), so
KVM_GET_XSAVE will return the host PKRU value instead. Fix this by
using the guest PKRU explicitly in fill_xsave and load_xsave. This
part is based on a patch by Junkang Fu.

The host PKRU data may also not match the value in vcpu->arch.guest_fpu.state,
because it could have been changed by userspace since the last time
it was saved, so skip loading it in kvm_load_guest_fpu.

Reported-by: Junkang Fu <[email protected]>
Cc: Yang Zhang <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/include/asm/fpu/internal.h | 6 +++---
arch/x86/kvm/x86.c | 17 ++++++++++++++---
2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 255645f60ca2..554cdb205d17 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
return 0;
}

-static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate)
+static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
{
if (use_xsave()) {
- copy_kernel_to_xregs(&fpstate->xsave, -1);
+ copy_kernel_to_xregs(&fpstate->xsave, mask);
} else {
if (use_fxsr())
copy_kernel_to_fxregs(&fpstate->fxsave);
@@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
: : [addr] "m" (fpstate));
}

- __copy_kernel_to_fpregs(fpstate);
+ __copy_kernel_to_fpregs(fpstate, -1);
}

extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 55c709531eb9..f0e64801b457 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3265,7 +3265,12 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
u32 size, offset, ecx, edx;
cpuid_count(XSTATE_CPUID, index,
&size, &offset, &ecx, &edx);
- memcpy(dest + offset, src, size);
+ if (feature == XFEATURE_MASK_PKRU)
+ memcpy(dest + offset, &vcpu->arch.pkru,
+ sizeof(vcpu->arch.pkru));
+ else
+ memcpy(dest + offset, src, size);
+
}

valid -= feature;
@@ -3303,7 +3308,11 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
u32 size, offset, ecx, edx;
cpuid_count(XSTATE_CPUID, index,
&size, &offset, &ecx, &edx);
- memcpy(dest, src + offset, size);
+ if (feature == XFEATURE_MASK_PKRU)
+ memcpy(&vcpu->arch.pkru, src + offset,
+ sizeof(vcpu->arch.pkru));
+ else
+ memcpy(dest, src + offset, size);
}

valid -= feature;
@@ -7651,7 +7660,9 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
*/
vcpu->guest_fpu_loaded = 1;
__kernel_fpu_begin();
- __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
+ /* PKRU is separately restored in kvm_x86_ops->run. */
+ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
+ ~XFEATURE_MASK_PKRU);
trace_kvm_fpu(1);
}

--
1.8.3.1

2017-08-23 21:26:39

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH 1/2] KVM: x86: simplify handling of PKRU

Move it to struct kvm_arch_vcpu, replacing guest_pkru_valid with a
simple comparison against the host value of the register.

Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/kvm_cache_regs.h | 5 -----
arch/x86/kvm/mmu.h | 2 +-
arch/x86/kvm/svm.c | 7 -------
arch/x86/kvm/vmx.c | 23 ++++++-----------------
5 files changed, 8 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 643308143bea..beb185361045 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -491,6 +491,7 @@ struct kvm_vcpu_arch {
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
unsigned long cr8;
+ u32 pkru;
u32 hflags;
u64 efer;
u64 apic_base;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 762cdf2595f9..e1e89ee4af75 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
}

-static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
-{
- return kvm_x86_ops->get_pkru(vcpu);
-}
-
static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
{
vcpu->arch.hflags |= HF_GUEST_MASK;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 3ed6192d93b1..1b3095264fcf 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -168,7 +168,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
* index of the protection domain, so pte_pkey * 2 is
* is the index of the first bit for the domain.
*/
- pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
+ pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;

/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
offset = (pfec & ~1) +
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 32c8d8f62985..f2355135164c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1826,11 +1826,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}

-static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
switch (reg) {
@@ -5438,8 +5433,6 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,

- .get_pkru = svm_get_pkru,
-
.tlb_flush = svm_flush_tlb,

.run = svm_vcpu_run,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ddabed8425b3..c603f658c42a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -639,8 +639,6 @@ struct vcpu_vmx {

u64 current_tsc_ratio;

- bool guest_pkru_valid;
- u32 guest_pkru;
u32 host_pkru;

/*
@@ -2392,11 +2390,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
}

-static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
-{
- return to_vmx(vcpu)->guest_pkru;
-}
-
static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
@@ -9239,8 +9232,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);

- if (vmx->guest_pkru_valid)
- __write_pkru(vmx->guest_pkru);
+ if (static_cpu_has(X86_FEATURE_OSPKE) &&
+ vcpu->arch.pkru != vmx->host_pkru)
+ __write_pkru(vcpu->arch.pkru);

atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
@@ -9388,13 +9382,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* back on host, so it is safe to read guest PKRU from current
* XSAVE.
*/
- if (boot_cpu_has(X86_FEATURE_OSPKE)) {
- vmx->guest_pkru = __read_pkru();
- if (vmx->guest_pkru != vmx->host_pkru) {
- vmx->guest_pkru_valid = true;
+ if (static_cpu_has(X86_FEATURE_OSPKE)) {
+ vcpu->arch.pkru = __read_pkru();
+ if (vcpu->arch.pkru != vmx->host_pkru)
__write_pkru(vmx->host_pkru);
- } else
- vmx->guest_pkru_valid = false;
}

/*
@@ -11875,8 +11866,6 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,

- .get_pkru = vmx_get_pkru,
-
.tlb_flush = vmx_flush_tlb,

.run = vmx_vcpu_run,
--
1.8.3.1


2017-08-24 09:09:27

by Yang Zhang

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: x86: simplify handling of PKRU

On 2017/8/24 5:26, Paolo Bonzini wrote:
> Move it to struct kvm_arch_vcpu, replacing guest_pkru_valid with a
> simple comparison against the host value of the register.

Thanks for refine the patches.:)

>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/kvm_cache_regs.h | 5 -----
> arch/x86/kvm/mmu.h | 2 +-
> arch/x86/kvm/svm.c | 7 -------
> arch/x86/kvm/vmx.c | 23 ++++++-----------------
> 5 files changed, 8 insertions(+), 30 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 643308143bea..beb185361045 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -491,6 +491,7 @@ struct kvm_vcpu_arch {
> unsigned long cr4;
> unsigned long cr4_guest_owned_bits;
> unsigned long cr8;
> + u32 pkru;
> u32 hflags;
> u64 efer;
> u64 apic_base;
> diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
> index 762cdf2595f9..e1e89ee4af75 100644
> --- a/arch/x86/kvm/kvm_cache_regs.h
> +++ b/arch/x86/kvm/kvm_cache_regs.h
> @@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
> | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
> }
>
> -static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
> -{
> - return kvm_x86_ops->get_pkru(vcpu);
> -}
> -
> static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
> {
> vcpu->arch.hflags |= HF_GUEST_MASK;
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 3ed6192d93b1..1b3095264fcf 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -168,7 +168,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> * index of the protection domain, so pte_pkey * 2 is
> * is the index of the first bit for the domain.
> */
> - pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
> + pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
>
> /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
> offset = (pfec & ~1) +
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 32c8d8f62985..f2355135164c 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1826,11 +1826,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
> to_svm(vcpu)->vmcb->save.rflags = rflags;
> }
>
> -static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
> -{
> - return 0;
> -}
> -
> static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
> {
> switch (reg) {
> @@ -5438,8 +5433,6 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
> .get_rflags = svm_get_rflags,
> .set_rflags = svm_set_rflags,
>
> - .get_pkru = svm_get_pkru,
> -
> .tlb_flush = svm_flush_tlb,
>
> .run = svm_vcpu_run,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index ddabed8425b3..c603f658c42a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -639,8 +639,6 @@ struct vcpu_vmx {
>
> u64 current_tsc_ratio;
>
> - bool guest_pkru_valid;
> - u32 guest_pkru;
> u32 host_pkru;
>
> /*
> @@ -2392,11 +2390,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
> to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
> }
>
> -static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
> -{
> - return to_vmx(vcpu)->guest_pkru;
> -}
> -
> static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
> {
> u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
> @@ -9239,8 +9232,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> vmx_set_interrupt_shadow(vcpu, 0);
>
> - if (vmx->guest_pkru_valid)
> - __write_pkru(vmx->guest_pkru);
> + if (static_cpu_has(X86_FEATURE_OSPKE) &&

We expose protection key to VM without check whether OSPKE is enabled or
not. Why not check guest's cpuid here which also can avoid unnecessary
access to pkru?

> + vcpu->arch.pkru != vmx->host_pkru)
> + __write_pkru(vcpu->arch.pkru);
>
> atomic_switch_perf_msrs(vmx);
> debugctlmsr = get_debugctlmsr();
> @@ -9388,13 +9382,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> * back on host, so it is safe to read guest PKRU from current
> * XSAVE.
> */
> - if (boot_cpu_has(X86_FEATURE_OSPKE)) {
> - vmx->guest_pkru = __read_pkru();
> - if (vmx->guest_pkru != vmx->host_pkru) {
> - vmx->guest_pkru_valid = true;
> + if (static_cpu_has(X86_FEATURE_OSPKE)) {
> + vcpu->arch.pkru = __read_pkru();
> + if (vcpu->arch.pkru != vmx->host_pkru)
> __write_pkru(vmx->host_pkru);
> - } else
> - vmx->guest_pkru_valid = false;
> }
>
> /*
> @@ -11875,8 +11866,6 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
> .get_rflags = vmx_get_rflags,
> .set_rflags = vmx_set_rflags,
>
> - .get_pkru = vmx_get_pkru,
> -
> .tlb_flush = vmx_flush_tlb,
>
> .run = vmx_vcpu_run,
>


--
Yang
Alibaba Cloud Computing

2017-08-24 09:19:38

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: x86: simplify handling of PKRU

On 24/08/2017 11:09, Yang Zhang wrote:
>> + if (static_cpu_has(X86_FEATURE_OSPKE) &&
>
> We expose protection key to VM without check whether OSPKE is enabled or
> not. Why not check guest's cpuid here which also can avoid unnecessary
> access to pkru?

Checking guest CPUID is pretty slow. We could check CR4.PKE though.

Also, using static_cpu_has with OSPKE is probably wrong. But if we do
check CR4.PKE, we can check X86_FEATURE_PKU instead, so something like

if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
vcpu->arch.pkru != vmx->host_pkru)

... but then, kvm_read_cr4_bits is also pretty slow---and we don't
really need it, since all CR4 writes cause a vmexit. So for now I'd
stay with this patch, only s/static_cpu_has/boot_cpu_has/g.

Of course you can send improvements on top!

Paolo

>> + vcpu->arch.pkru != vmx->host_pkru)
>> + __write_pkru(vcpu->arch.pkru);

2017-08-24 10:06:08

by Yang Zhang

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: x86: simplify handling of PKRU

On 2017/8/24 17:19, Paolo Bonzini wrote:
> On 24/08/2017 11:09, Yang Zhang wrote:
>>> + if (static_cpu_has(X86_FEATURE_OSPKE) &&
>>
>> We expose protection key to VM without check whether OSPKE is enabled or
>> not. Why not check guest's cpuid here which also can avoid unnecessary
>> access to pkru?
>
> Checking guest CPUID is pretty slow. We could check CR4.PKE though.
>
> Also, using static_cpu_has with OSPKE is probably wrong. But if we do
> check CR4.PKE, we can check X86_FEATURE_PKU instead, so something like
>
> if (static_cpu_has(X86_FEATURE_PKU) &&
> kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
> vcpu->arch.pkru != vmx->host_pkru)
>
> ... but then, kvm_read_cr4_bits is also pretty slow---and we don't
> really need it, since all CR4 writes cause a vmexit. So for now I'd
> stay with this patch, only s/static_cpu_has/boot_cpu_has/g.
>
> Of course you can send improvements on top!

ok, since most OS distributions don't support protection key so far
which means vcpu->arch.pkru always 0 in it and i remember host_pkru will
be set to 55555554 be default. To avoid the unnecessary access to pkru,
how about the following change:

@@ -4338,6 +4331,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu,
unsigned long cr4)
return 1;
}

+ if (cr4 & X86_CR4_PKE)
+ to_vmx(vcpu)->guest_pkru_valid = true;
+
if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
return 1;

@@ -9020,8 +9016,10 @@ static void __noclone vmx_vcpu_run(struct
kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);

- if (vmx->guest_pkru_valid)
- __write_pkru(vmx->guest_pkru);
+ if (static_cpu_has(X86_FEATURE_OSPKE) &&
+ vmx->guest_pkru_valid &&
+ vcpu->arch.pkru != vmx->host_pkru)
+ __write_pkru(vcpu->arch.pkru);

atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
@@ -9169,13 +9167,11 @@ static void __noclone vmx_vcpu_run(struct
kvm_vcpu *vcpu)
* back on host, so it is safe to read guest PKRU from current
* XSAVE.
*/
- if (boot_cpu_has(X86_FEATURE_OSPKE)) {
- vmx->guest_pkru = __read_pkru();
- if (vmx->guest_pkru != vmx->host_pkru) {
- vmx->guest_pkru_valid = true;
+ if (static_cpu_has(X86_FEATURE_OSPKE) &&
+ vmx->guest_pkru_valid) {
+ vcpu->arch.pkru = __read_pkru();
+ if (vcpu->arch.pkru != vmx->host_pkru)
__write_pkru(vmx->host_pkru);
- } else
- vmx->guest_pkru_valid = false;
}

/*

--
Yang
Alibaba Cloud Computing

2017-08-24 10:15:01

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 1/2] KVM: x86: simplify handling of PKRU

On 24/08/2017 12:05, Yang Zhang wrote:
> On 2017/8/24 17:19, Paolo Bonzini wrote:
>> On 24/08/2017 11:09, Yang Zhang wrote:
>>>> + if (static_cpu_has(X86_FEATURE_OSPKE) &&
>>>
>>> We expose protection key to VM without check whether OSPKE is enabled or
>>> not. Why not check guest's cpuid here which also can avoid unnecessary
>>> access to pkru?
>>
>> Checking guest CPUID is pretty slow. We could check CR4.PKE though.
>>
>> Also, using static_cpu_has with OSPKE is probably wrong. But if we do
>> check CR4.PKE, we can check X86_FEATURE_PKU instead, so something like
>>
>> if (static_cpu_has(X86_FEATURE_PKU) &&
>> kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
>> vcpu->arch.pkru != vmx->host_pkru)
>>
>> ... but then, kvm_read_cr4_bits is also pretty slow---and we don't
>> really need it, since all CR4 writes cause a vmexit. So for now I'd
>> stay with this patch, only s/static_cpu_has/boot_cpu_has/g.
>>
>> Of course you can send improvements on top!
>
> ok, since most OS distributions don't support protection key so far
> which means vcpu->arch.pkru always 0 in it and i remember host_pkru will
> be set to 55555554 be default. To avoid the unnecessary access to pkru,
> how about the following change:

Even better: reading guest's CR4.PKE is actually _not_ slow because
X86_CR4_PKE is not part of KVM_POSSIBLE_CR4_GUEST_BITS. So
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) is compiled to just "vcpu->arch.cr4
& X86_CR4_PKE".

We need to be careful though to disable guest PKU if host OSPKE is off,
because otherwise __read_pkru and __write_pkru cause a #GP.

I've sent v2 of the series now, incorporating your suggestion. Thanks!

Paolo

> @@ -4338,6 +4331,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu,
> unsigned long cr4)
> return 1;
> }
>
> + if (cr4 & X86_CR4_PKE)
> + to_vmx(vcpu)->guest_pkru_valid = true;
> +
> if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
> return 1;
>
> @@ -9020,8 +9016,10 @@ static void __noclone vmx_vcpu_run(struct
> kvm_vcpu *vcpu)
> if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> vmx_set_interrupt_shadow(vcpu, 0);
>
> - if (vmx->guest_pkru_valid)
> - __write_pkru(vmx->guest_pkru);
> + if (static_cpu_has(X86_FEATURE_OSPKE) &&
> + vmx->guest_pkru_valid &&
> + vcpu->arch.pkru != vmx->host_pkru)
> + __write_pkru(vcpu->arch.pkru);
>
> atomic_switch_perf_msrs(vmx);
> debugctlmsr = get_debugctlmsr();
> @@ -9169,13 +9167,11 @@ static void __noclone vmx_vcpu_run(struct
> kvm_vcpu *vcpu)
> * back on host, so it is safe to read guest PKRU from current
> * XSAVE.
> */
> - if (boot_cpu_has(X86_FEATURE_OSPKE)) {
> - vmx->guest_pkru = __read_pkru();
> - if (vmx->guest_pkru != vmx->host_pkru) {
> - vmx->guest_pkru_valid = true;
> + if (static_cpu_has(X86_FEATURE_OSPKE) &&
> + vmx->guest_pkru_valid) {
> + vcpu->arch.pkru = __read_pkru();
> + if (vcpu->arch.pkru != vmx->host_pkru)
> __write_pkru(vmx->host_pkru);
> - } else
> - vmx->guest_pkru_valid = false;
> }
>
> /*
>