Reference: https://bugzilla.kernel.org/show_bug.cgi?id=54521
The vmxon region is unused by nvmx, but adding these checks
are probably harmless and may detect buggy L1 hypervisors in
the future!
Bandan Das (3):
KVM: nVMX: rearrange get_vmx_mem_address
KVM: nVMX: additional checks on vmxon region
KVM: nVMX: fail on invalid vmclear/vmptrld pointer
arch/x86/kvm/vmx.c | 171 ++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 118 insertions(+), 53 deletions(-)
--
1.8.3.1
handle_vmon will call this function to get the vmxon region
pointer in the next patch
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/kvm/vmx.c | 106 ++++++++++++++++++++++++++---------------------------
1 file changed, 53 insertions(+), 53 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1f68c58..c18fe9a4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5775,6 +5775,59 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
}
/*
+ * Decode the memory-address operand of a vmx instruction, as recorded on an
+ * exit caused by such an instruction (run by a guest hypervisor).
+ * On success, returns 0. When the operand is invalid, returns 1 and throws
+ * #UD or #GP.
+ */
+static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
+ unsigned long exit_qualification,
+ u32 vmx_instruction_info, gva_t *ret)
+{
+ /*
+ * According to Vol. 3B, "Information for VM Exits Due to Instruction
+ * Execution", on an exit, vmx_instruction_info holds most of the
+ * addressing components of the operand. Only the displacement part
+ * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
+ * For how an actual address is calculated from all these components,
+ * refer to Vol. 1, "Operand Addressing".
+ */
+ int scaling = vmx_instruction_info & 3;
+ int addr_size = (vmx_instruction_info >> 7) & 7;
+ bool is_reg = vmx_instruction_info & (1u << 10);
+ int seg_reg = (vmx_instruction_info >> 15) & 7;
+ int index_reg = (vmx_instruction_info >> 18) & 0xf;
+ bool index_is_valid = !(vmx_instruction_info & (1u << 22));
+ int base_reg = (vmx_instruction_info >> 23) & 0xf;
+ bool base_is_valid = !(vmx_instruction_info & (1u << 27));
+
+ if (is_reg) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ /* Addr = segment_base + offset */
+ /* offset = base + [index * scale] + displacement */
+ *ret = vmx_get_segment_base(vcpu, seg_reg);
+ if (base_is_valid)
+ *ret += kvm_register_read(vcpu, base_reg);
+ if (index_is_valid)
+ *ret += kvm_register_read(vcpu, index_reg)<<scaling;
+ *ret += exit_qualification; /* holds the displacement */
+
+ if (addr_size == 1) /* 32 bit */
+ *ret &= 0xffffffff;
+
+ /*
+ * TODO: throw #GP (and return 1) in various cases that the VM*
+ * instructions require it - e.g., offset beyond segment limit,
+ * unusable or unreadable/unwritable segment, non-canonical 64-bit
+ * address, and so on. Currently these are not checked.
+ */
+ return 0;
+}
+
+/*
* Emulate the VMXON instruction.
* Currently, we just remember that VMX is active, and do not save or even
* inspect the argument to VMXON (the so-called "VMXON pointer") because we
@@ -5934,59 +5987,6 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
return 1;
}
-/*
- * Decode the memory-address operand of a vmx instruction, as recorded on an
- * exit caused by such an instruction (run by a guest hypervisor).
- * On success, returns 0. When the operand is invalid, returns 1 and throws
- * #UD or #GP.
- */
-static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
- unsigned long exit_qualification,
- u32 vmx_instruction_info, gva_t *ret)
-{
- /*
- * According to Vol. 3B, "Information for VM Exits Due to Instruction
- * Execution", on an exit, vmx_instruction_info holds most of the
- * addressing components of the operand. Only the displacement part
- * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
- * For how an actual address is calculated from all these components,
- * refer to Vol. 1, "Operand Addressing".
- */
- int scaling = vmx_instruction_info & 3;
- int addr_size = (vmx_instruction_info >> 7) & 7;
- bool is_reg = vmx_instruction_info & (1u << 10);
- int seg_reg = (vmx_instruction_info >> 15) & 7;
- int index_reg = (vmx_instruction_info >> 18) & 0xf;
- bool index_is_valid = !(vmx_instruction_info & (1u << 22));
- int base_reg = (vmx_instruction_info >> 23) & 0xf;
- bool base_is_valid = !(vmx_instruction_info & (1u << 27));
-
- if (is_reg) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
-
- /* Addr = segment_base + offset */
- /* offset = base + [index * scale] + displacement */
- *ret = vmx_get_segment_base(vcpu, seg_reg);
- if (base_is_valid)
- *ret += kvm_register_read(vcpu, base_reg);
- if (index_is_valid)
- *ret += kvm_register_read(vcpu, index_reg)<<scaling;
- *ret += exit_qualification; /* holds the displacement */
-
- if (addr_size == 1) /* 32 bit */
- *ret &= 0xffffffff;
-
- /*
- * TODO: throw #GP (and return 1) in various cases that the VM*
- * instructions require it - e.g., offset beyond segment limit,
- * unusable or unreadable/unwritable segment, non-canonical 64-bit
- * address, and so on. Currently these are not checked.
- */
- return 0;
-}
-
/* Emulate the VMCLEAR instruction */
static int handle_vmclear(struct kvm_vcpu *vcpu)
{
--
1.8.3.1
The spec mandates that if the vmptrld or vmclear
address is equal to the vmxon region pointer, the
instruction should fail with error "VMPTRLD with
VMXON pointer" or "VMCLEAR with VMXON pointer"
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/kvm/vmx.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d5342c7..8864fa1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6069,6 +6069,12 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
return 1;
}
+ if (vmptr == vmx->nested.vmxon_ptr) {
+ nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
+ skip_emulated_instruction(vcpu);
+ return 1;
+ }
+
if (vmptr == vmx->nested.current_vmptr) {
nested_release_vmcs12(vmx);
vmx->nested.current_vmptr = -1ull;
@@ -6412,6 +6418,12 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return 1;
}
+ if (vmptr == vmx->nested.vmxon_ptr) {
+ nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
+ skip_emulated_instruction(vcpu);
+ return 1;
+ }
+
if (vmx->nested.current_vmptr != vmptr) {
struct vmcs12 *new_vmcs12;
struct page *page;
--
1.8.3.1
Currently, the vmxon region isn't used in the nested case.
However, according to the spec, the vmxon instruction performs
additional sanity checks on this region and the associated
pointer. Modify emulated vmxon to better adhere to the spec
requirements
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c18fe9a4..d5342c7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -354,6 +354,7 @@ struct vmcs02_list {
struct nested_vmx {
/* Has the level1 guest done vmxon? */
bool vmxon;
+ gpa_t vmxon_ptr;
/* The guest-physical address of the current VMCS L1 keeps for L2 */
gpa_t current_vmptr;
@@ -5840,9 +5841,19 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
struct kvm_segment cs;
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs *shadow_vmcs;
+ gva_t gva;
+ gpa_t vmptr;
+ struct x86_exception e;
+ struct page *page;
+
const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+ /* Guest physical Address Width */
+ struct kvm_cpuid_entry2 *best =
+ kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+
+
/* The Intel VMX Instruction Reference lists a bunch of bits that
* are prerequisite to running VMXON, most notably cr4.VMXE must be
* set to 1 (see vmx_set_cr4() for when we allow the guest to set this).
@@ -5865,6 +5876,46 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
kvm_inject_gp(vcpu, 0);
return 1;
}
+
+ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
+ return 1;
+
+ if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
+ sizeof(vmptr), &e)) {
+ kvm_inject_page_fault(vcpu, &e);
+ return 1;
+ }
+
+ /* Don't bailout if best is NULL */
+ WARN_ON(!best);
+
+ /*
+ * SDM 3: 24.11.5
+ * VMXON pointer must be 4KB aligned
+ * VMXON pointer must not set any bits beyond processor's
+ * physical address width
+ * The first 4 bytes of VMXON region contain the supported
+ * VMCS revision identifier
+ *
+ * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
+ * which replaces physical address width with 32
+ */
+ if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (best &&
+ (vmptr >> (best->eax & 0xff)))) {
+ nested_vmx_failInvalid(vcpu);
+ skip_emulated_instruction(vcpu);
+ return 1;
+ }
+
+ page = nested_get_page(vcpu, vmptr);
+ if ((page == NULL) || ((*(u32 *)kmap(page) != VMCS12_REVISION))) {
+ nested_vmx_failInvalid(vcpu);
+ kunmap(page);
+ skip_emulated_instruction(vcpu);
+ return 1;
+ }
+
if (vmx->nested.vmxon) {
nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
skip_emulated_instruction(vcpu);
@@ -5896,9 +5947,11 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
vmx->nested.vmxon = true;
+ vmx->nested.vmxon_ptr = vmptr;
skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
+ kunmap(page);
return 1;
}
--
1.8.3.1
On 2014-04-28 07:00, Bandan Das wrote:
> Currently, the vmxon region isn't used in the nested case.
> However, according to the spec, the vmxon instruction performs
> additional sanity checks on this region and the associated
> pointer. Modify emulated vmxon to better adhere to the spec
> requirements
>
> Signed-off-by: Bandan Das <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 53 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index c18fe9a4..d5342c7 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -354,6 +354,7 @@ struct vmcs02_list {
> struct nested_vmx {
> /* Has the level1 guest done vmxon? */
> bool vmxon;
> + gpa_t vmxon_ptr;
>
> /* The guest-physical address of the current VMCS L1 keeps for L2 */
> gpa_t current_vmptr;
> @@ -5840,9 +5841,19 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
> struct kvm_segment cs;
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> struct vmcs *shadow_vmcs;
> + gva_t gva;
> + gpa_t vmptr;
> + struct x86_exception e;
> + struct page *page;
> +
> const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
> | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
>
> + /* Guest physical Address Width */
> + struct kvm_cpuid_entry2 *best =
> + kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
We have cpuid_maxphyaddr().
> +
> +
> /* The Intel VMX Instruction Reference lists a bunch of bits that
> * are prerequisite to running VMXON, most notably cr4.VMXE must be
> * set to 1 (see vmx_set_cr4() for when we allow the guest to set this).
> @@ -5865,6 +5876,46 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
> kvm_inject_gp(vcpu, 0);
> return 1;
> }
> +
> + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
> + vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
> + return 1;
> +
> + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
> + sizeof(vmptr), &e)) {
> + kvm_inject_page_fault(vcpu, &e);
> + return 1;
> + }
> +
> + /* Don't bailout if best is NULL */
> + WARN_ON(!best);
> +
> + /*
> + * SDM 3: 24.11.5
> + * VMXON pointer must be 4KB aligned
> + * VMXON pointer must not set any bits beyond processor's
> + * physical address width
> + * The first 4 bytes of VMXON region contain the supported
> + * VMCS revision identifier
> + *
> + * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
> + * which replaces physical address width with 32
> + */
> + if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (best &&
> + (vmptr >> (best->eax & 0xff)))) {
> + nested_vmx_failInvalid(vcpu);
> + skip_emulated_instruction(vcpu);
> + return 1;
> + }
> +
> + page = nested_get_page(vcpu, vmptr);
> + if ((page == NULL) || ((*(u32 *)kmap(page) != VMCS12_REVISION))) {
Style: you don't need braces around the comparisons.
> + nested_vmx_failInvalid(vcpu);
> + kunmap(page);
> + skip_emulated_instruction(vcpu);
> + return 1;
> + }
> +
> if (vmx->nested.vmxon) {
> nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
> skip_emulated_instruction(vcpu);
> @@ -5896,9 +5947,11 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
> vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
>
> vmx->nested.vmxon = true;
> + vmx->nested.vmxon_ptr = vmptr;
>
> skip_emulated_instruction(vcpu);
> nested_vmx_succeed(vcpu);
> + kunmap(page);
This late unmapping leaks the page in other error cases.
Jan
> return 1;
> }
>
>
--
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux
On 2014-04-28 07:00, Bandan Das wrote:
> Reference: https://bugzilla.kernel.org/show_bug.cgi?id=54521
>
> The vmxon region is unused by nvmx, but adding these checks
> are probably harmless and may detect buggy L1 hypervisors in
> the future!
Nice and welcome! Will you provide unit tests for these cases as well?
Jan
>
> Bandan Das (3):
> KVM: nVMX: rearrange get_vmx_mem_address
> KVM: nVMX: additional checks on vmxon region
> KVM: nVMX: fail on invalid vmclear/vmptrld pointer
>
> arch/x86/kvm/vmx.c | 171 ++++++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 118 insertions(+), 53 deletions(-)
>
--
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux
Jan Kiszka <[email protected]> writes:
> On 2014-04-28 07:00, Bandan Das wrote:
>> Reference: https://bugzilla.kernel.org/show_bug.cgi?id=54521
>>
>> The vmxon region is unused by nvmx, but adding these checks
>> are probably harmless and may detect buggy L1 hypervisors in
>> the future!
>
> Nice and welcome! Will you provide unit tests for these cases as well?
Yeah, this and test cases for ACK_INTR_ON_EXIT are in my todo list :)
I will follow up soon..
> Jan
>
>>
>> Bandan Das (3):
>> KVM: nVMX: rearrange get_vmx_mem_address
>> KVM: nVMX: additional checks on vmxon region
>> KVM: nVMX: fail on invalid vmclear/vmptrld pointer
>>
>> arch/x86/kvm/vmx.c | 171 ++++++++++++++++++++++++++++++++++++-----------------
>> 1 file changed, 118 insertions(+), 53 deletions(-)
>>
Jan Kiszka <[email protected]> writes:
> On 2014-04-28 07:00, Bandan Das wrote:
>> Currently, the vmxon region isn't used in the nested case.
>> However, according to the spec, the vmxon instruction performs
>> additional sanity checks on this region and the associated
>> pointer. Modify emulated vmxon to better adhere to the spec
>> requirements
>>
>> Signed-off-by: Bandan Das <[email protected]>
>> ---
>> arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 53 insertions(+)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index c18fe9a4..d5342c7 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -354,6 +354,7 @@ struct vmcs02_list {
>> struct nested_vmx {
>> /* Has the level1 guest done vmxon? */
>> bool vmxon;
>> + gpa_t vmxon_ptr;
>>
>> /* The guest-physical address of the current VMCS L1 keeps for L2 */
>> gpa_t current_vmptr;
>> @@ -5840,9 +5841,19 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>> struct kvm_segment cs;
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> struct vmcs *shadow_vmcs;
>> + gva_t gva;
>> + gpa_t vmptr;
>> + struct x86_exception e;
>> + struct page *page;
>> +
>> const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
>> | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
>>
>> + /* Guest physical Address Width */
>> + struct kvm_cpuid_entry2 *best =
>> + kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
>
> We have cpuid_maxphyaddr().
>
>> +
>> +
>> /* The Intel VMX Instruction Reference lists a bunch of bits that
>> * are prerequisite to running VMXON, most notably cr4.VMXE must be
>> * set to 1 (see vmx_set_cr4() for when we allow the guest to set this).
>> @@ -5865,6 +5876,46 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>> kvm_inject_gp(vcpu, 0);
>> return 1;
>> }
>> +
>> + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
>> + vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
>> + return 1;
>> +
>> + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
>> + sizeof(vmptr), &e)) {
>> + kvm_inject_page_fault(vcpu, &e);
>> + return 1;
>> + }
>> +
>> + /* Don't bailout if best is NULL */
>> + WARN_ON(!best);
>> +
>> + /*
>> + * SDM 3: 24.11.5
>> + * VMXON pointer must be 4KB aligned
>> + * VMXON pointer must not set any bits beyond processor's
>> + * physical address width
>> + * The first 4 bytes of VMXON region contain the supported
>> + * VMCS revision identifier
>> + *
>> + * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
>> + * which replaces physical address width with 32
>> + */
>> + if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (best &&
>> + (vmptr >> (best->eax & 0xff)))) {
>> + nested_vmx_failInvalid(vcpu);
>> + skip_emulated_instruction(vcpu);
>> + return 1;
>> + }
>> +
>> + page = nested_get_page(vcpu, vmptr);
>> + if ((page == NULL) || ((*(u32 *)kmap(page) != VMCS12_REVISION))) {
>
> Style: you don't need braces around the comparisons.
>
>> + nested_vmx_failInvalid(vcpu);
>> + kunmap(page);
>> + skip_emulated_instruction(vcpu);
>> + return 1;
>> + }
>> +
>> if (vmx->nested.vmxon) {
>> nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
>> skip_emulated_instruction(vcpu);
>> @@ -5896,9 +5947,11 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>> vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
>>
>> vmx->nested.vmxon = true;
>> + vmx->nested.vmxon_ptr = vmptr;
>>
>> skip_emulated_instruction(vcpu);
>> nested_vmx_succeed(vcpu);
>> + kunmap(page);
>
> This late unmapping leaks the page in other error cases.
Oops, sorry!
> Jan
>
>> return 1;
>> }
>>
>>