The FS and userspace GS bases are available in current->thread, while the
kernel GS base is a percpu variable. Skip the expensive rdmsr and just
get the values from memory.
Signed-off-by: Paolo Bonzini <[email protected]>
---
v1->v2: hide the accessor for 32-bit kernels
arch/x86/include/asm/desc.h | 8 ++++++++
arch/x86/include/asm/kvm_host.h | 10 ----------
arch/x86/kernel/cpu/common.c | 1 +
arch/x86/kvm/svm.c | 2 +-
arch/x86/kvm/vmx.c | 17 +++--------------
5 files changed, 13 insertions(+), 25 deletions(-)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4011cb03ef08..6ef8c47d0baa 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -85,6 +85,14 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
}
+#ifdef CONFIG_X86_64
+/* Provide the current kernel GS base. */
+static inline void *get_current_kernel_gs_base(void)
+{
+ return this_cpu_ptr(irq_stack_union.gs_base);
+}
+#endif
+
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
unsigned dpl, unsigned ist, unsigned seg)
{
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 842e7c31b53d..a965086c99ce 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1303,16 +1303,6 @@ static inline void kvm_load_ldt(u16 sel)
asm("lldt %0" : : "rm"(sel));
}
-#ifdef CONFIG_X86_64
-static inline unsigned long read_msr(unsigned long msr)
-{
- u64 value;
-
- rdmsrl(msr, value);
- return value;
-}
-#endif
-
static inline u32 get_rdx_init_val(void)
{
return 0x600; /* P6 family */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fa998ca8aa5a..83f0ddcf7bfc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1340,6 +1340,7 @@ static __init int setup_clearcpuid(char *arg)
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible;
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
/*
* The following percpu variables are hot. Align current_task to
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3dbe690c8821..699113cf410e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1721,7 +1721,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
#ifdef CONFIG_X86_64
- rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
+ to_svm(vcpu)->host.gs_base = (u64) get_current_kernel_gs_base();
#endif
savesegment(fs, svm->host.fs);
savesegment(gs, svm->host.gs);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 30e6115d4f09..1ab72ab16b03 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2188,15 +2188,14 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
#endif
#ifdef CONFIG_X86_64
- vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
- vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
+ vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
+ vmcs_writel(HOST_GS_BASE, (u64) get_current_kernel_gs_base());
#else
vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
#endif
#ifdef CONFIG_X86_64
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#endif
if (boot_cpu_has(X86_FEATURE_MPX))
@@ -2235,7 +2234,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#endif
invalidate_tss_limit();
#ifdef CONFIG_X86_64
- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
#endif
if (vmx->host_state.msr_host_bndcfgs)
wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
@@ -5698,9 +5697,6 @@ static void ept_set_mmio_spte_mask(void)
*/
static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
-#ifdef CONFIG_X86_64
- unsigned long a;
-#endif
int i;
if (enable_shadow_vmcs) {
@@ -5749,15 +5745,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
vmx_set_constant_host_state(vmx);
-#ifdef CONFIG_X86_64
- rdmsrl(MSR_FS_BASE, a);
- vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
- rdmsrl(MSR_GS_BASE, a);
- vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */
-#else
vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
-#endif
if (cpu_has_vmx_vmfunc())
vmcs_write64(VM_FUNCTION_CONTROL, 0);
--
1.8.3.1
> On Jan 2, 2018, at 5:59 AM, Paolo Bonzini <[email protected]> wrote:
>
> The FS and userspace GS bases are available in current->thread, while the
> kernel GS base is a percpu variable. Skip the expensive rdmsr and just
> get the values from memory.
That fsbase change is wrong: thread->fsbase is not guaranteed to be
correct for current.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> v1->v2: hide the accessor for 32-bit kernels
>
> arch/x86/include/asm/desc.h | 8 ++++++++
> arch/x86/include/asm/kvm_host.h | 10 ----------
> arch/x86/kernel/cpu/common.c | 1 +
> arch/x86/kvm/svm.c | 2 +-
> arch/x86/kvm/vmx.c | 17 +++--------------
> 5 files changed, 13 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
> index 4011cb03ef08..6ef8c47d0baa 100644
> --- a/arch/x86/include/asm/desc.h
> +++ b/arch/x86/include/asm/desc.h
> @@ -85,6 +85,14 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
> return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
> }
>
> +#ifdef CONFIG_X86_64
> +/* Provide the current kernel GS base. */
> +static inline void *get_current_kernel_gs_base(void)
> +{
> + return this_cpu_ptr(irq_stack_union.gs_base);
> +}
> +#endif
This is an awful name because MSR_KERNEL_GS_BASE means the user gs
base. How about calling it something like
get_this_cpu_kernelmode_gs_base() or similar?
> #ifdef CONFIG_X86_64
> - vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
> - vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
> + vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
That's wrong. thread->fsbase isn't kept up to date while the thread
is running. You could potentially try to expose an interface to get
save_base_legacy() called to update it.
On 03/01/2018 23:20, Andy Lutomirski wrote:
>> On Jan 2, 2018, at 5:59 AM, Paolo Bonzini <[email protected]> wrote:
>>
>> The FS and userspace GS bases are available in current->thread, while the
>> kernel GS base is a percpu variable. Skip the expensive rdmsr and just
>> get the values from memory.
>
> That fsbase change is wrong: thread->fsbase is not guaranteed to be
> correct for current.
Note that the value I'm storing in HOST_FS_BASE and HOST_GS_BASE is only
used if FS/GS selector is zero. If FS/GS selector is not zero, it is
not used. Does that avoid this issue?
Certainly worth a comment or clearer code though.
>> +#ifdef CONFIG_X86_64
>> +/* Provide the current kernel GS base. */
>> +static inline void *get_current_kernel_gs_base(void)
>> +{
>> + return this_cpu_ptr(irq_stack_union.gs_base);
>> +}
>> +#endif
>
> This is an awful name because MSR_KERNEL_GS_BASE means the user gs
> base. How about calling it something like
> get_this_cpu_kernelmode_gs_base() or similar?
True, I'll adopt your name.
Paolo
>> #ifdef CONFIG_X86_64
>> - vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
>> - vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
>> + vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
>
> That's wrong. thread->fsbase isn't kept up to date while the thread
> is running. You could potentially try to expose an interface to get
> save_base_legacy() called to update it.
>
> On Jan 3, 2018, at 3:42 PM, Paolo Bonzini <[email protected]> wrote:
>
> On 03/01/2018 23:20, Andy Lutomirski wrote:
>>> On Jan 2, 2018, at 5:59 AM, Paolo Bonzini <[email protected]> wrote:
>>>
>>> The FS and userspace GS bases are available in current->thread, while the
>>> kernel GS base is a percpu variable. Skip the expensive rdmsr and just
>>> get the values from memory.
>>
>> That fsbase change is wrong: thread->fsbase is not guaranteed to be
>> correct for current.
>
> Note that the value I'm storing in HOST_FS_BASE and HOST_GS_BASE is only
> used if FS/GS selector is zero. If FS/GS selector is not zero, it is
> not used. Does that avoid this issue?
>
I'm not convinced that this is correct. It's not obviously a security problem in the context of KVM, but a lot of state can leak this way.
My general preference would be to make the code obviously fully reload the host state.
I'll try to look at it for real soon.
> Certainly worth a comment or clearer code though.
>
>>> +#ifdef CONFIG_X86_64
>>> +/* Provide the current kernel GS base. */
>>> +static inline void *get_current_kernel_gs_base(void)
>>> +{
>>> + return this_cpu_ptr(irq_stack_union.gs_base);
>>> +}
>>> +#endif
>>
>> This is an awful name because MSR_KERNEL_GS_BASE means the user gs
>> base. How about calling it something like
>> get_this_cpu_kernelmode_gs_base() or similar?
>
> True, I'll adopt your name.
>
> Paolo
>
>>> #ifdef CONFIG_X86_64
>>> - vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
>>> - vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
>>> + vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
>>
>> That's wrong. thread->fsbase isn't kept up to date while the thread
>> is running. You could potentially try to expose an interface to get
>> save_base_legacy() called to update it.
>>
>
On 04/01/2018 01:16, Andy Lutomirski wrote:
>> Note that the value I'm storing in HOST_FS_BASE and HOST_GS_BASE is
>> only used if FS/GS selector is zero. If FS/GS selector is not
>> zero, it is not used. Does that avoid this issue?
>>
> I'm not convinced that this is correct. It's not obviously a
> security problem in the context of KVM, but a lot of state can leak
> this way.
>
> My general preference would be to make the code obviously fully
> reload the host state.
I'll try to write a v3 that looks more obviously correct.
Paolo