2006-12-28 10:07:21

by Avi Kivity

[permalink] [raw]
Subject: [PATCH 0/8] KVM updates for 2.6.20-rc2

Compatibility and stabilization fixes, many centered around msrs, but a
few other corner cases as well.

--
error compiling committee.c: too many arguments to function


2006-12-28 10:08:19

by Avi Kivity

[permalink] [raw]
Subject: [PATCH 1/8] KVM: Use boot_cpu_data instead of current_cpu_data

current_cpu_data invokes smp_processor_id(), which is inadvisable when
preemption is enabled. Switch to boot_cpu_data instead.

Resolves sourceforge bug 1621401.

Signed-off-by: Avi Kivity <[email protected]>

Index: linux-2.6/drivers/kvm/svm.c
===================================================================
--- linux-2.6.orig/drivers/kvm/svm.c
+++ linux-2.6/drivers/kvm/svm.c
@@ -246,7 +246,7 @@ static int has_svm(void)
{
uint32_t eax, ebx, ecx, edx;

- if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
printk(KERN_INFO "has_svm: not amd\n");
return 0;
}

2006-12-28 10:09:20

by Avi Kivity

[permalink] [raw]
Subject: [PATCH 2/8] KVM: Simplify is_long_mode()

Instead of doing tricky stuff with the arch dependent virtualization registers,
take a peek at the guest's efer.

This simlifies some code, and fixes some confusion in the mmu branch.

Signed-off-by: Avi Kivity <[email protected]>

Index: linux-2.6/drivers/kvm/mmu.c
===================================================================
--- linux-2.6.orig/drivers/kvm/mmu.c
+++ linux-2.6/drivers/kvm/mmu.c
@@ -578,7 +578,7 @@ static int init_kvm_mmu(struct kvm_vcpu

if (!is_paging(vcpu))
return nonpaging_init_context(vcpu);
- else if (kvm_arch_ops->is_long_mode(vcpu))
+ else if (is_long_mode(vcpu))
return paging64_init_context(vcpu);
else if (is_pae(vcpu))
return paging32E_init_context(vcpu);
Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -398,7 +398,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsi
return;
}

- if (kvm_arch_ops->is_long_mode(vcpu)) {
+ if (is_long_mode(vcpu)) {
if (!(cr4 & CR4_PAE_MASK)) {
printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
"in long mode\n");
@@ -425,7 +425,7 @@ EXPORT_SYMBOL_GPL(set_cr4);

void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
- if (kvm_arch_ops->is_long_mode(vcpu)) {
+ if (is_long_mode(vcpu)) {
if ( cr3 & CR3_L_MODE_RESEVED_BITS) {
printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
inject_gp(vcpu);
Index: linux-2.6/drivers/kvm/kvm.h
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm.h
+++ linux-2.6/drivers/kvm/kvm.h
@@ -278,7 +278,6 @@ struct kvm_arch_ops {
struct kvm_segment *var, int seg);
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
- int (*is_long_mode)(struct kvm_vcpu *vcpu);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu,
@@ -403,6 +402,15 @@ static inline struct page *_gfn_to_page(
return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : NULL;
}

+static inline int is_long_mode(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_X86_64
+ return vcpu->shadow_efer & EFER_LME;
+#else
+ return 0;
+#endif
+}
+
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return vcpu->cr4 & CR4_PAE_MASK;
Index: linux-2.6/drivers/kvm/svm.c
===================================================================
--- linux-2.6.orig/drivers/kvm/svm.c
+++ linux-2.6/drivers/kvm/svm.c
@@ -166,11 +166,6 @@ static inline void write_dr7(unsigned lo
asm volatile ("mov %0, %%dr7" :: "r" (val));
}

-static inline int svm_is_long_mode(struct kvm_vcpu *vcpu)
-{
- return vcpu->svm->vmcb->save.efer & KVM_EFER_LMA;
-}
-
static inline void force_new_asid(struct kvm_vcpu *vcpu)
{
vcpu->svm->asid_generation--;
@@ -1609,7 +1604,6 @@ static struct kvm_arch_ops svm_arch_ops
.get_segment_base = svm_get_segment_base,
.get_segment = svm_get_segment,
.set_segment = svm_set_segment,
- .is_long_mode = svm_is_long_mode,
.get_cs_db_l_bits = svm_get_cs_db_l_bits,
.set_cr0 = svm_set_cr0,
.set_cr0_no_modeswitch = svm_set_cr0,
Index: linux-2.6/drivers/kvm/paging_tmpl.h
===================================================================
--- linux-2.6.orig/drivers/kvm/paging_tmpl.h
+++ linux-2.6/drivers/kvm/paging_tmpl.h
@@ -68,7 +68,7 @@ static void FNAME(init_walker)(struct gu
hpa = safe_gpa_to_hpa(vcpu, vcpu->cr3 & PT64_BASE_ADDR_MASK);
walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0);

- ASSERT((!kvm_arch_ops->is_long_mode(vcpu) && is_pae(vcpu)) ||
+ ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
(vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0);

walker->table = (pt_element_t *)( (unsigned long)walker->table |
@@ -131,7 +131,7 @@ static pt_element_t *FNAME(fetch_guest)(
(walker->table[index] & PT_PAGE_SIZE_MASK) &&
(PTTYPE == 64 || is_pse(vcpu))))
return &walker->table[index];
- if (walker->level != 3 || kvm_arch_ops->is_long_mode(vcpu))
+ if (walker->level != 3 || is_long_mode(vcpu))
walker->inherited_ar &= walker->table[index];
paddr = safe_gpa_to_hpa(vcpu, walker->table[index] & PT_BASE_ADDR_MASK);
kunmap_atomic(walker->table, KM_USER0);
Index: linux-2.6/drivers/kvm/vmx.c
===================================================================
--- linux-2.6.orig/drivers/kvm/vmx.c
+++ linux-2.6/drivers/kvm/vmx.c
@@ -900,11 +900,6 @@ static void vmx_set_segment(struct kvm_v
vmcs_write32(sf->ar_bytes, ar);
}

-static int vmx_is_long_mode(struct kvm_vcpu *vcpu)
-{
- return vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_CONTROLS_IA32E_MASK;
-}
-
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
{
u32 ar = vmcs_read32(GUEST_CS_AR_BYTES);
@@ -1975,7 +1970,6 @@ static struct kvm_arch_ops vmx_arch_ops
.get_segment_base = vmx_get_segment_base,
.get_segment = vmx_get_segment,
.set_segment = vmx_set_segment,
- .is_long_mode = vmx_is_long_mode,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
.set_cr0 = vmx_set_cr0,
.set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch,

2006-12-28 10:11:19

by Avi Kivity

[permalink] [raw]
Subject: [PATCH 4/8] KVM: Implement a few system configuration msrs

Resolves sourceforge bug 1622229 (guest crashes running benchmark software).

Signed-off-by: Avi Kivity <[email protected]>

Index: linux-2.6/drivers/kvm/svm.c
===================================================================
--- linux-2.6.orig/drivers/kvm/svm.c
+++ linux-2.6/drivers/kvm/svm.c
@@ -1068,6 +1068,9 @@ static int emulate_on_interception(struc
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
{
switch (ecx) {
+ case 0xc0010010: /* SYSCFG */
+ case 0xc0010015: /* HWCR */
+ case MSR_IA32_PLATFORM_ID:
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MC0_CTL:
Index: linux-2.6/drivers/kvm/vmx.c
===================================================================
--- linux-2.6.orig/drivers/kvm/vmx.c
+++ linux-2.6/drivers/kvm/vmx.c
@@ -359,6 +359,9 @@ static int vmx_get_msr(struct kvm_vcpu *
case MSR_IA32_SYSENTER_ESP:
data = vmcs_read32(GUEST_SYSENTER_ESP);
break;
+ case 0xc0010010: /* SYSCFG */
+ case 0xc0010015: /* HWCR */
+ case MSR_IA32_PLATFORM_ID:
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MC0_CTL:

2006-12-28 10:12:20

by Avi Kivity

[permalink] [raw]
Subject: [PATCH 5/8] KVM: Move common msr handling to arch independent code

Signed-off-by: Avi Kivity <[email protected]>

Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -1103,6 +1103,47 @@ void realmode_set_cr(struct kvm_vcpu *vc
}
}

+int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+ u64 data;
+
+ switch (msr) {
+ case 0xc0010010: /* SYSCFG */
+ case 0xc0010015: /* HWCR */
+ case MSR_IA32_PLATFORM_ID:
+ case MSR_IA32_P5_MC_ADDR:
+ case MSR_IA32_P5_MC_TYPE:
+ case MSR_IA32_MC0_CTL:
+ case MSR_IA32_MCG_STATUS:
+ case MSR_IA32_MCG_CAP:
+ case MSR_IA32_MC0_MISC:
+ case MSR_IA32_MC0_MISC+4:
+ case MSR_IA32_MC0_MISC+8:
+ case MSR_IA32_MC0_MISC+12:
+ case MSR_IA32_MC0_MISC+16:
+ case MSR_IA32_UCODE_REV:
+ /* MTRR registers */
+ case 0xfe:
+ case 0x200 ... 0x2ff:
+ data = 0;
+ break;
+ case MSR_IA32_APICBASE:
+ data = vcpu->apic_base;
+ break;
+#ifdef CONFIG_X86_64
+ case MSR_EFER:
+ data = vcpu->shadow_efer;
+ break;
+#endif
+ default:
+ printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", msr);
+ return 1;
+ }
+ *pdata = data;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_get_msr_common);
+
/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
@@ -1115,7 +1156,7 @@ static int get_msr(struct kvm_vcpu *vcpu

#ifdef CONFIG_X86_64

-void set_efer(struct kvm_vcpu *vcpu, u64 efer)
+static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (efer & EFER_RESERVED_BITS) {
printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
@@ -1138,10 +1179,36 @@ void set_efer(struct kvm_vcpu *vcpu, u64

vcpu->shadow_efer = efer;
}
-EXPORT_SYMBOL_GPL(set_efer);

#endif

+int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ switch (msr) {
+#ifdef CONFIG_X86_64
+ case MSR_EFER:
+ set_efer(vcpu, data);
+ break;
+#endif
+ case MSR_IA32_MC0_STATUS:
+ printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
+ __FUNCTION__, data);
+ break;
+ case MSR_IA32_UCODE_REV:
+ case MSR_IA32_UCODE_WRITE:
+ case 0x200 ... 0x2ff: /* MTRRs */
+ break;
+ case MSR_IA32_APICBASE:
+ vcpu->apic_base = data;
+ break;
+ default:
+ printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr);
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_set_msr_common);
+
/*
* Writes msr value into into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
Index: linux-2.6/drivers/kvm/kvm.h
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm.h
+++ linux-2.6/drivers/kvm/kvm.h
@@ -374,9 +374,8 @@ void set_cr4(struct kvm_vcpu *vcpu, unsi
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);

-#ifdef CONFIG_X86_64
-void set_efer(struct kvm_vcpu *vcpu, u64 efer);
-#endif
+int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);

void fx_init(struct kvm_vcpu *vcpu);

Index: linux-2.6/drivers/kvm/svm.c
===================================================================
--- linux-2.6.orig/drivers/kvm/svm.c
+++ linux-2.6/drivers/kvm/svm.c
@@ -1068,25 +1068,6 @@ static int emulate_on_interception(struc
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
{
switch (ecx) {
- case 0xc0010010: /* SYSCFG */
- case 0xc0010015: /* HWCR */
- case MSR_IA32_PLATFORM_ID:
- case MSR_IA32_P5_MC_ADDR:
- case MSR_IA32_P5_MC_TYPE:
- case MSR_IA32_MC0_CTL:
- case MSR_IA32_MCG_STATUS:
- case MSR_IA32_MCG_CAP:
- case MSR_IA32_MC0_MISC:
- case MSR_IA32_MC0_MISC+4:
- case MSR_IA32_MC0_MISC+8:
- case MSR_IA32_MC0_MISC+12:
- case MSR_IA32_MC0_MISC+16:
- case MSR_IA32_UCODE_REV:
- /* MTRR registers */
- case 0xfe:
- case 0x200 ... 0x2ff:
- *data = 0;
- break;
case MSR_IA32_TIME_STAMP_COUNTER: {
u64 tsc;

@@ -1094,12 +1075,6 @@ static int svm_get_msr(struct kvm_vcpu *
*data = vcpu->svm->vmcb->control.tsc_offset + tsc;
break;
}
- case MSR_EFER:
- *data = vcpu->shadow_efer;
- break;
- case MSR_IA32_APICBASE:
- *data = vcpu->apic_base;
- break;
case MSR_K6_STAR:
*data = vcpu->svm->vmcb->save.star;
break;
@@ -1127,8 +1102,7 @@ static int svm_get_msr(struct kvm_vcpu *
*data = vcpu->svm->vmcb->save.sysenter_esp;
break;
default:
- printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", ecx);
- return 1;
+ return kvm_get_msr_common(vcpu, ecx, data);
}
return 0;
}
@@ -1152,15 +1126,6 @@ static int rdmsr_interception(struct kvm
static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
{
switch (ecx) {
-#ifdef CONFIG_X86_64
- case MSR_EFER:
- set_efer(vcpu, data);
- break;
-#endif
- case MSR_IA32_MC0_STATUS:
- printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n"
- , __FUNCTION__, data);
- break;
case MSR_IA32_TIME_STAMP_COUNTER: {
u64 tsc;

@@ -1168,13 +1133,6 @@ static int svm_set_msr(struct kvm_vcpu *
vcpu->svm->vmcb->control.tsc_offset = data - tsc;
break;
}
- case MSR_IA32_UCODE_REV:
- case MSR_IA32_UCODE_WRITE:
- case 0x200 ... 0x2ff: /* MTRRs */
- break;
- case MSR_IA32_APICBASE:
- vcpu->apic_base = data;
- break;
case MSR_K6_STAR:
vcpu->svm->vmcb->save.star = data;
break;
@@ -1202,8 +1160,7 @@ static int svm_set_msr(struct kvm_vcpu *
vcpu->svm->vmcb->save.sysenter_esp = data;
break;
default:
- printk(KERN_ERR "kvm: unhandled wrmsr: %x\n", ecx);
- return 1;
+ return kvm_set_msr_common(vcpu, ecx, data);
}
return 0;
}
Index: linux-2.6/drivers/kvm/vmx.c
===================================================================
--- linux-2.6.orig/drivers/kvm/vmx.c
+++ linux-2.6/drivers/kvm/vmx.c
@@ -344,8 +344,7 @@ static int vmx_get_msr(struct kvm_vcpu *
data = vmcs_readl(GUEST_GS_BASE);
break;
case MSR_EFER:
- data = vcpu->shadow_efer;
- break;
+ return kvm_get_msr_common(vcpu, msr_index, pdata);
#endif
case MSR_IA32_TIME_STAMP_COUNTER:
data = guest_read_tsc();
@@ -359,36 +358,13 @@ static int vmx_get_msr(struct kvm_vcpu *
case MSR_IA32_SYSENTER_ESP:
data = vmcs_read32(GUEST_SYSENTER_ESP);
break;
- case 0xc0010010: /* SYSCFG */
- case 0xc0010015: /* HWCR */
- case MSR_IA32_PLATFORM_ID:
- case MSR_IA32_P5_MC_ADDR:
- case MSR_IA32_P5_MC_TYPE:
- case MSR_IA32_MC0_CTL:
- case MSR_IA32_MCG_STATUS:
- case MSR_IA32_MCG_CAP:
- case MSR_IA32_MC0_MISC:
- case MSR_IA32_MC0_MISC+4:
- case MSR_IA32_MC0_MISC+8:
- case MSR_IA32_MC0_MISC+12:
- case MSR_IA32_MC0_MISC+16:
- case MSR_IA32_UCODE_REV:
- /* MTRR registers */
- case 0xfe:
- case 0x200 ... 0x2ff:
- data = 0;
- break;
- case MSR_IA32_APICBASE:
- data = vcpu->apic_base;
- break;
default:
msr = find_msr_entry(vcpu, msr_index);
- if (!msr) {
- printk(KERN_ERR "kvm: unhandled rdmsr: %x\n", msr_index);
- return 1;
+ if (msr) {
+ data = msr->data;
+ break;
}
- data = msr->data;
- break;
+ return kvm_get_msr_common(vcpu, msr_index, pdata);
}

*pdata = data;
@@ -405,6 +381,8 @@ static int vmx_set_msr(struct kvm_vcpu *
struct vmx_msr_entry *msr;
switch (msr_index) {
#ifdef CONFIG_X86_64
+ case MSR_EFER:
+ return kvm_set_msr_common(vcpu, msr_index, data);
case MSR_FS_BASE:
vmcs_writel(GUEST_FS_BASE, data);
break;
@@ -421,32 +399,17 @@ static int vmx_set_msr(struct kvm_vcpu *
case MSR_IA32_SYSENTER_ESP:
vmcs_write32(GUEST_SYSENTER_ESP, data);
break;
-#ifdef __x86_64
- case MSR_EFER:
- set_efer(vcpu, data);
- break;
- case MSR_IA32_MC0_STATUS:
- printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n"
- , __FUNCTION__, data);
- break;
-#endif
case MSR_IA32_TIME_STAMP_COUNTER: {
guest_write_tsc(data);
break;
}
- case MSR_IA32_UCODE_REV:
- case MSR_IA32_UCODE_WRITE:
- case 0x200 ... 0x2ff: /* MTRRs */
- break;
- case MSR_IA32_APICBASE:
- vcpu->apic_base = data;
- break;
default:
msr = find_msr_entry(vcpu, msr_index);
- if (!msr) {
- printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr_index);
- return 1;
+ if (msr) {
+ msr->data = data;
+ break;
}
+ return kvm_set_msr_common(vcpu, msr_index, data);
msr->data = data;
break;
}

2006-12-28 10:13:19

by Avi Kivity

[permalink] [raw]
Subject: [PATCH 6/8] KVM: More msr misery

These msrs are referenced by benchmarking software when pretending to be
an Intel cpu.

Signed-off-by: Avi Kivity <[email protected]>

Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -1122,11 +1122,15 @@ int kvm_get_msr_common(struct kvm_vcpu *
case MSR_IA32_MC0_MISC+12:
case MSR_IA32_MC0_MISC+16:
case MSR_IA32_UCODE_REV:
+ case MSR_IA32_PERF_STATUS:
/* MTRR registers */
case 0xfe:
case 0x200 ... 0x2ff:
data = 0;
break;
+ case 0xcd: /* fsb frequency */
+ data = 3;
+ break;
case MSR_IA32_APICBASE:
data = vcpu->apic_base;
break;

2006-12-28 10:14:21

by Avi Kivity

[permalink] [raw]
Subject: [PATCH 7/8] KVM: Rename some msrs

From: Nguyen Anh Quynh <[email protected]>

No need to append _MSR to msr names, a prefix should suffice.

Signed-off-by: Nguyen Anh Quynh <[email protected]>
Signed-off-by: Avi Kivity <[email protected]>

Index: linux-2.6/drivers/kvm/vmx.c
===================================================================
--- linux-2.6.orig/drivers/kvm/vmx.c
+++ linux-2.6/drivers/kvm/vmx.c
@@ -26,7 +26,6 @@

#include "segment_descriptor.h"

-#define MSR_IA32_FEATURE_CONTROL 0x03a

MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -519,11 +518,11 @@ static __init void setup_vmcs_descriptor
{
u32 vmx_msr_low, vmx_msr_high;

- rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
+ rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
vmcs_descriptor.size = vmx_msr_high & 0x1fff;
vmcs_descriptor.order = get_order(vmcs_descriptor.size);
vmcs_descriptor.revision_id = vmx_msr_low;
-};
+}

static struct vmcs *alloc_vmcs_cpu(int cpu)
{
@@ -1039,12 +1038,12 @@ static int vmx_vcpu_setup(struct kvm_vcp
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);

/* Control */
- vmcs_write32_fixedbits(MSR_IA32_VMX_PINBASED_CTLS_MSR,
+ vmcs_write32_fixedbits(MSR_IA32_VMX_PINBASED_CTLS,
PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_EXT_INTR_MASK /* 20.6.1 */
| PIN_BASED_NMI_EXITING /* 20.6.1 */
);
- vmcs_write32_fixedbits(MSR_IA32_VMX_PROCBASED_CTLS_MSR,
+ vmcs_write32_fixedbits(MSR_IA32_VMX_PROCBASED_CTLS,
CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_HLT_EXITING /* 20.6.2 */
| CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */
@@ -1127,7 +1126,7 @@ static int vmx_vcpu_setup(struct kvm_vcp
virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS));
vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
virt_to_phys(vcpu->host_msrs + NR_BAD_MSRS));
- vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS_MSR, VM_EXIT_CONTROLS,
+ vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS,
(HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
@@ -1135,7 +1134,7 @@ static int vmx_vcpu_setup(struct kvm_vcp


/* 22.2.1, 20.8.1 */
- vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS_MSR,
+ vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS,
VM_ENTRY_CONTROLS, 0);
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */

Index: linux-2.6/drivers/kvm/vmx.h
===================================================================
--- linux-2.6.orig/drivers/kvm/vmx.h
+++ linux-2.6/drivers/kvm/vmx.h
@@ -286,11 +286,11 @@ enum vmcs_field {

#define CR4_VMXE 0x2000

-#define MSR_IA32_VMX_BASIC_MSR 0x480
+#define MSR_IA32_VMX_BASIC 0x480
#define MSR_IA32_FEATURE_CONTROL 0x03a
-#define MSR_IA32_VMX_PINBASED_CTLS_MSR 0x481
-#define MSR_IA32_VMX_PROCBASED_CTLS_MSR 0x482
-#define MSR_IA32_VMX_EXIT_CTLS_MSR 0x483
-#define MSR_IA32_VMX_ENTRY_CTLS_MSR 0x484
+#define MSR_IA32_VMX_PINBASED_CTLS 0x481
+#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
+#define MSR_IA32_VMX_EXIT_CTLS 0x483
+#define MSR_IA32_VMX_ENTRY_CTLS 0x484

#endif

2006-12-28 10:15:20

by Avi Kivity

[permalink] [raw]
Subject: [PATCH 8/8] KVM: Fix oops on oom

__free_page() doesn't like a NULL argument, so check before calling it. A
NULL can only happen if memory is exhausted during allocation of a memory
slot.

Signed-off-by: Avi Kivity <[email protected]>

Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -245,7 +245,8 @@ static void kvm_free_physmem_slot(struct
if (!dont || free->phys_mem != dont->phys_mem)
if (free->phys_mem) {
for (i = 0; i < free->npages; ++i)
- __free_page(free->phys_mem[i]);
+ if (free->phys_mem[i])
+ __free_page(free->phys_mem[i]);
vfree(free->phys_mem);
}

2006-12-28 12:45:25

by Ingo Molnar

[permalink] [raw]
Subject: [patch] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()

Subject: [patch] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()
From: Ingo Molnar <[email protected]>

fix a GFP_KERNEL allocation in atomic section bug:
kvm_dev_ioctl_create_vcpu() called kvm_mmu_init(), which calls
alloc_pages(), while holding the vcpu. The fix is to set up the MMU
state earlier, it does not require a loaded CPU state.

(NOTE: free_vcpus does an kvm_mmu_destroy() call so there's no need
for any extra teardown branch on allocation failure here.)

found in 2.6.20-rc2-rt1.

Signed-off-by: Ingo Molnar <[email protected]>
---
drivers/kvm/kvm_main.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

Index: linux/drivers/kvm/kvm_main.c
===================================================================
--- linux.orig/drivers/kvm/kvm_main.c
+++ linux/drivers/kvm/kvm_main.c
@@ -522,12 +522,12 @@ static int kvm_dev_ioctl_create_vcpu(str
if (r < 0)
goto out_free_vcpus;

- kvm_arch_ops->vcpu_load(vcpu);
+ r = kvm_mmu_init(vcpu);
+ if (r < 0)
+ goto out_free_vcpus;

+ kvm_arch_ops->vcpu_load(vcpu);
r = kvm_arch_ops->vcpu_setup(vcpu);
- if (r >= 0)
- r = kvm_mmu_init(vcpu);
-
vcpu_put(vcpu);

if (r < 0)

2006-12-28 12:56:12

by Avi Kivity

[permalink] [raw]
Subject: Re: [patch] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()

Ingo Molnar wrote:
> Subject: [patch] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()
> From: Ingo Molnar <[email protected]>
>
> fix a GFP_KERNEL allocation in atomic section bug:
> kvm_dev_ioctl_create_vcpu() called kvm_mmu_init(), which calls
> alloc_pages(), while holding the vcpu. The fix is to set up the MMU
> state earlier, it does not require a loaded CPU state.
>

Yes it does. It calls nonpaging_init_context() which calls
vmx_set_cr3() which promptly trashes address space of the VM that
previously ran on that vcpu (or, if there were none, logs a vmwrite error).

--
error compiling committee.c: too many arguments to function

2006-12-28 12:58:45

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()


* Avi Kivity <[email protected]> wrote:

> >fix a GFP_KERNEL allocation in atomic section bug:
> >kvm_dev_ioctl_create_vcpu() called kvm_mmu_init(), which calls
> >alloc_pages(), while holding the vcpu. The fix is to set up the MMU
> >state earlier, it does not require a loaded CPU state.
>
> Yes it does. It calls nonpaging_init_context() which calls
> vmx_set_cr3() which promptly trashes address space of the VM that
> previously ran on that vcpu (or, if there were none, logs a vmwrite
> error).

ok, i missed that. Nevertheless the problem of the nonatomic alloc
remains. I guess a kvm_mmu_init() needs to be split into
kvm_mmu_create() and kvm_mmu_setup()?

Ingo

2006-12-28 13:11:07

by Ingo Molnar

[permalink] [raw]
Subject: [patch, try#2] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()


* Ingo Molnar <[email protected]> wrote:

> > Yes it does. It calls nonpaging_init_context() which calls
> > vmx_set_cr3() which promptly trashes address space of the VM that
> > previously ran on that vcpu (or, if there were none, logs a vmwrite
> > error).
>
> ok, i missed that. Nevertheless the problem of the nonatomic alloc
> remains. I guess a kvm_mmu_init() needs to be split into
> kvm_mmu_create() and kvm_mmu_setup()?

the patch below implements this fix. Lightly tested on 32-bit/VMX on
2.6.20-rc2-rt2 but seems to have done the trick.

Ingo

-------------------->
Subject: [patch] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()
From: Ingo Molnar <[email protected]>

fix an GFP_KERNEL allocation in atomic section:
kvm_dev_ioctl_create_vcpu() called kvm_mmu_init(), which calls
alloc_pages(), while holding the vcpu.

The fix is to set up the MMU state in two phases: kvm_mmu_create() and
kvm_mmu_setup().

(NOTE: free_vcpus does an kvm_mmu_destroy() call so there's no need
for any extra teardown branch on allocation/init failure here.)

Signed-off-by: Ingo Molnar <[email protected]>
---
drivers/kvm/kvm.h | 3 ++-
drivers/kvm/kvm_main.c | 10 ++++++----
drivers/kvm/mmu.c | 24 +++++++++---------------
3 files changed, 17 insertions(+), 20 deletions(-)

Index: linux/drivers/kvm/kvm.h
===================================================================
--- linux.orig/drivers/kvm/kvm.h
+++ linux/drivers/kvm/kvm.h
@@ -319,7 +319,8 @@ int kvm_init_arch(struct kvm_arch_ops *o
void kvm_exit_arch(void);

void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
-int kvm_mmu_init(struct kvm_vcpu *vcpu);
+int kvm_mmu_create(struct kvm_vcpu *vcpu);
+int kvm_mmu_setup(struct kvm_vcpu *vcpu);

int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
Index: linux/drivers/kvm/kvm_main.c
===================================================================
--- linux.orig/drivers/kvm/kvm_main.c
+++ linux/drivers/kvm/kvm_main.c
@@ -522,12 +522,14 @@ static int kvm_dev_ioctl_create_vcpu(str
if (r < 0)
goto out_free_vcpus;

- kvm_arch_ops->vcpu_load(vcpu);
+ r = kvm_mmu_create(vcpu);
+ if (r < 0)
+ goto out_free_vcpus;

- r = kvm_arch_ops->vcpu_setup(vcpu);
+ kvm_arch_ops->vcpu_load(vcpu);
+ r = kvm_mmu_setup(vcpu);
if (r >= 0)
- r = kvm_mmu_init(vcpu);
-
+ r = kvm_arch_ops->vcpu_setup(vcpu);
vcpu_put(vcpu);

if (r < 0)
Index: linux/drivers/kvm/mmu.c
===================================================================
--- linux.orig/drivers/kvm/mmu.c
+++ linux/drivers/kvm/mmu.c
@@ -639,28 +639,22 @@ error_1:
return -ENOMEM;
}

-int kvm_mmu_init(struct kvm_vcpu *vcpu)
+int kvm_mmu_create(struct kvm_vcpu *vcpu)
{
- int r;
-
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
ASSERT(list_empty(&vcpu->free_pages));

- r = alloc_mmu_pages(vcpu);
- if (r)
- goto out;
-
- r = init_kvm_mmu(vcpu);
- if (r)
- goto out_free_pages;
+ return alloc_mmu_pages(vcpu);
+}

- return 0;
+int kvm_mmu_setup(struct kvm_vcpu *vcpu)
+{
+ ASSERT(vcpu);
+ ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
+ ASSERT(!list_empty(&vcpu->free_pages));

-out_free_pages:
- free_mmu_pages(vcpu);
-out:
- return r;
+ return init_kvm_mmu(vcpu);
}

void kvm_mmu_destroy(struct kvm_vcpu *vcpu)

2006-12-28 13:14:49

by Avi Kivity

[permalink] [raw]
Subject: Re: [patch, try#2] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()

Ingo Molnar wrote:
> Subject: [patch] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()
> From: Ingo Molnar <[email protected]>
>
> fix an GFP_KERNEL allocation in atomic section:
> kvm_dev_ioctl_create_vcpu() called kvm_mmu_init(), which calls
> alloc_pages(), while holding the vcpu.
>
> The fix is to set up the MMU state in two phases: kvm_mmu_create() and
> kvm_mmu_setup().
>
> (NOTE: free_vcpus does an kvm_mmu_destroy() call so there's no need
> for any extra teardown branch on allocation/init failure here.)
>
> Signed-off-by: Ingo Molnar <[email protected]>
>

Applied, thanks.

--
error compiling committee.c: too many arguments to function

2006-12-28 13:26:27

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch, try#2] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()


* Avi Kivity <[email protected]> wrote:

> Ingo Molnar wrote:
> >Subject: [patch] kvm: fix GFP_KERNEL allocation in atomic section in
> >kvm_dev_ioctl_create_vcpu()
> >From: Ingo Molnar <[email protected]>
> >
> >fix an GFP_KERNEL allocation in atomic section:
> >kvm_dev_ioctl_create_vcpu() called kvm_mmu_init(), which calls
> >alloc_pages(), while holding the vcpu.
> >
> >The fix is to set up the MMU state in two phases: kvm_mmu_create() and
> >kvm_mmu_setup().
> >
> >(NOTE: free_vcpus does an kvm_mmu_destroy() call so there's no need
> > for any extra teardown branch on allocation/init failure here.)
> >
> >Signed-off-by: Ingo Molnar <[email protected]>
> >
>
> Applied, thanks.

great!

I've got a security related question as well: vcpu_load() sets up a
physical CPU's VM registers/state, and vcpu_put() drops that. But
vcpu_put() only does a put_cpu() call - it does not tear down any VM
state that has been loaded into the CPU. Is it guaranteed that (hostile)
user-space cannot use that VM state in any unauthorized way? The state
is still loaded while arbitrary tasks execute on the CPU. The next
vcpu_load() will then override it, but the state lingers around forever.

The new x86 VM instructions: vmclear, vmlaunch, vmresume, vmptrld,
vmread, vmwrite, vmxoff, vmxon are all privileged so i guess it should
be mostly safe - i'm just wondering whether you thought about this
attack angle.

ultimately we want to integrate VM state management into the scheduler
and the context-switch lowlevel arch code, but right now CPU state
management is done by the KVM 'driver' and there's nothing that isolates
other tasks from possible side-effects of a loaded VMX/SVN state.

Ingo

2006-12-28 13:30:47

by Avi Kivity

[permalink] [raw]
Subject: Re: [patch, try#2] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()

Ingo Molnar wrote:
> I've got a security related question as well: vcpu_load() sets up a
> physical CPU's VM registers/state, and vcpu_put() drops that. But
> vcpu_put() only does a put_cpu() call - it does not tear down any VM
> state that has been loaded into the CPU. Is it guaranteed that (hostile)
> user-space cannot use that VM state in any unauthorized way? The state
> is still loaded while arbitrary tasks execute on the CPU. The next
> vcpu_load() will then override it, but the state lingers around forever.
>
> The new x86 VM instructions: vmclear, vmlaunch, vmresume, vmptrld,
> vmread, vmwrite, vmxoff, vmxon are all privileged so i guess it should
> be mostly safe - i'm just wondering whether you thought about this
> attack angle.
>

Yes. Userspace cannot snoop on a VM state.

> ultimately we want to integrate VM state management into the scheduler
> and the context-switch lowlevel arch code, but right now CPU state
> management is done by the KVM 'driver' and there's nothing that isolates
> other tasks from possible side-effects of a loaded VMX/SVN state.
>

AFAICS in vmx root mode the vm state only affects vmx instructions; SVM
has no architecturally hidden state.


--
error compiling committee.c: too many arguments to function

2007-01-01 00:07:26

by Ingo Oeser

[permalink] [raw]
Subject: Re: [PATCH 4/8] KVM: Implement a few system configuration msrs

Hi,

On Thursday, 28. December 2006 11:11, Avi Kivity wrote:
> Index: linux-2.6/drivers/kvm/svm.c
> ===================================================================
> --- linux-2.6.orig/drivers/kvm/svm.c
> +++ linux-2.6/drivers/kvm/svm.c
> @@ -1068,6 +1068,9 @@ static int emulate_on_interception(struc
> static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
> {
> switch (ecx) {
> + case 0xc0010010: /* SYSCFG */
> + case 0xc0010015: /* HWCR */
> + case MSR_IA32_PLATFORM_ID:
> case MSR_IA32_P5_MC_ADDR:
> case MSR_IA32_P5_MC_TYPE:
> case MSR_IA32_MC0_CTL:

What about just defining constants for these?
Then you can rip out these comments.

Same for linux-2.6/drivers/kvm/vmx.c


Regards

Ingo Oeser

2007-01-01 08:20:56

by Avi Kivity

[permalink] [raw]
Subject: Re: [PATCH 4/8] KVM: Implement a few system configuration msrs

Ingo Oeser wrote:
> Hi,
>
> On Thursday, 28. December 2006 11:11, Avi Kivity wrote:
>
>> Index: linux-2.6/drivers/kvm/svm.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/kvm/svm.c
>> +++ linux-2.6/drivers/kvm/svm.c
>> @@ -1068,6 +1068,9 @@ static int emulate_on_interception(struc
>> static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
>> {
>> switch (ecx) {
>> + case 0xc0010010: /* SYSCFG */
>> + case 0xc0010015: /* HWCR */
>> + case MSR_IA32_PLATFORM_ID:
>> case MSR_IA32_P5_MC_ADDR:
>> case MSR_IA32_P5_MC_TYPE:
>> case MSR_IA32_MC0_CTL:
>>
>
> What about just defining constants for these?
> Then you can rip out these comments.
>
> Same for linux-2.6/drivers/kvm/vmx.c
>

Yes, there are a few more of these as well. I'll clean them up once
things quiet down a bit.

--
error compiling committee.c: too many arguments to function