2017-08-17 16:37:06

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH v2 0/3] KVM: MMU: pending MMU and nEPT patches

v2 has all the suggestions from David.

Paolo

Brijesh Singh (1):
KVM: x86: Avoid guest page table walk when gpa_available is set

Paolo Bonzini (3):
KVM: x86: simplify ept_misconfig
KVM: x86: fix use of L1 MMIO areas in nested guests
KVM: x86: fix PML in nested guests

arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/mmu.c | 35 +++++++++++++++++++++++++++++++++--
arch/x86/kvm/mmu.h | 17 -----------------
arch/x86/kvm/paging_tmpl.h | 3 +--
arch/x86/kvm/svm.c | 2 --
arch/x86/kvm/vmx.c | 28 ++++++++++++----------------
arch/x86/kvm/x86.c | 20 +++++++-------------
arch/x86/kvm/x86.h | 6 +++++-
8 files changed, 60 insertions(+), 54 deletions(-)

--
1.8.3.1


2017-08-17 16:37:13

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH 1/3] KVM: x86: simplify ept_misconfig

Calling handle_mmio_page_fault() has been unnecessary since commit
e9ee956e311d ("KVM: x86: MMU: Move handle_mmio_page_fault() call to
kvm_mmu_page_fault()", 2016-02-22).

handle_mmio_page_fault() can now be made static.

Signed-off-by: Paolo Bonzini <[email protected]>
---
v1->v2: make the function static.

arch/x86/kvm/mmu.c | 19 ++++++++++++++++++-
arch/x86/kvm/mmu.h | 17 -----------------
arch/x86/kvm/vmx.c | 13 +++----------
3 files changed, 21 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e721e10afda1..f7598883920a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3648,7 +3648,23 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
return reserved;
}

-int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+/*
+ * Return values of handle_mmio_page_fault:
+ * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
+ * directly.
+ * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
+ * fault path update the mmio spte.
+ * RET_MMIO_PF_RETRY: let CPU fault again on the address.
+ * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
+ */
+enum {
+ RET_MMIO_PF_EMULATE = 1,
+ RET_MMIO_PF_INVALID = 2,
+ RET_MMIO_PF_RETRY = 0,
+ RET_MMIO_PF_BUG = -1
+};
+
+static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
{
u64 spte;
bool reserved;
@@ -4837,6 +4853,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
return 1;
if (r < 0)
return r;
+ /* Must be RET_MMIO_PF_INVALID. */
}

r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d7d248a000dd..3ed6192d93b1 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -56,23 +56,6 @@ static inline u64 rsvd_bits(int s, int e)
void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);

-/*
- * Return values of handle_mmio_page_fault:
- * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
- * directly.
- * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
- * fault path update the mmio spte.
- * RET_MMIO_PF_RETRY: let CPU fault again on the address.
- * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
- */
-enum {
- RET_MMIO_PF_EMULATE = 1,
- RET_MMIO_PF_INVALID = 2,
- RET_MMIO_PF_RETRY = 0,
- RET_MMIO_PF_BUG = -1
-};
-
-int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index df8d2f127508..45fb0ea78ee8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6410,17 +6410,10 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}

- ret = handle_mmio_page_fault(vcpu, gpa, true);
vcpu->arch.gpa_available = true;
- if (likely(ret == RET_MMIO_PF_EMULATE))
- return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
- EMULATE_DONE;
-
- if (unlikely(ret == RET_MMIO_PF_INVALID))
- return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0);
-
- if (unlikely(ret == RET_MMIO_PF_RETRY))
- return 1;
+ ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
+ if (ret >= 0)
+ return ret;

/* It is the real ept misconfig */
WARN_ON(1);
--
1.8.3.1


2017-08-17 16:37:11

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH 2/3] KVM: x86: Avoid guest page table walk when gpa_available is set

From: Brijesh Singh <[email protected]>

When a guest causes a page fault which requires emulation, the
vcpu->arch.gpa_available flag is set to indicate that cr2 contains a
valid GPA.

Currently, emulator_read_write_onepage() makes use of gpa_available flag
to avoid a guest page walk for a known MMIO regions. Lets not limit
the gpa_available optimization to just MMIO region. The patch extends
the check to avoid page walk whenever gpa_available flag is set.

Signed-off-by: Brijesh Singh <[email protected]>
[Fix EPT=0 according to Wanpeng Li's fix, plus ensure VMX also uses the
new code. - Paolo]
Signed-off-by: Paolo Bonzini <[email protected]>
---
v1->v2: standardize on "nGPA" moniker, move gpa_available
assignment to x86.c

arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/mmu.c | 6 ++++++
arch/x86/kvm/svm.c | 2 --
arch/x86/kvm/vmx.c | 4 ----
arch/x86/kvm/x86.c | 20 +++++++-------------
5 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9e4862e0e978..6db0ed9cf59e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -685,8 +685,9 @@ struct kvm_vcpu_arch {
int pending_ioapic_eoi;
int pending_external_vector;

- /* GPA available (AMD only) */
+ /* GPA available */
bool gpa_available;
+ gpa_t gpa_val;

/* be preempted when it's in kernel-mode(cpl=0) */
bool preempted_in_kernel;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f7598883920a..a2c592b14617 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4843,6 +4843,12 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
enum emulation_result er;
bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);

+ /* With shadow page tables, fault_address contains a GVA or nGPA. */
+ if (vcpu->arch.mmu.direct_map) {
+ vcpu->arch.gpa_available = true;
+ vcpu->arch.gpa_val = cr2;
+ }
+
if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, cr2, direct);
if (r == RET_MMIO_PF_EMULATE) {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1fa9ee5660f4..a603d0c14a5a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4236,8 +4236,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)

trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);

- vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
-
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 45fb0ea78ee8..e2c8b33c35d1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6393,9 +6393,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
error_code |= (exit_qualification & 0x100) != 0 ?
PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;

- vcpu->arch.gpa_available = true;
vcpu->arch.exit_qualification = exit_qualification;
-
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
}

@@ -6410,7 +6408,6 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}

- vcpu->arch.gpa_available = true;
ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
if (ret >= 0)
return ret;
@@ -8644,7 +8641,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
u32 vectoring_info = vmx->idt_vectoring_info;

trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
- vcpu->arch.gpa_available = false;

/*
* Flush logged GPAs PML buffer, this will make dirty_bitmap more
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e10eda86bc7b..3f34d5f3db8d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4657,25 +4657,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
*/
if (vcpu->arch.gpa_available &&
emulator_can_use_gpa(ctxt) &&
- vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
- (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
- gpa = exception->address;
- goto mmio;
+ (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
+ gpa = vcpu->arch.gpa_val;
+ ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
+ } else {
+ ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
}

- ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
-
if (ret < 0)
return X86EMUL_PROPAGATE_FAULT;
-
- /* For APIC access vmexit */
- if (ret)
- goto mmio;
-
- if (ops->read_write_emulate(vcpu, gpa, val, bytes))
+ if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
return X86EMUL_CONTINUE;

-mmio:
/*
* Is this MMIO handled locally?
*/
@@ -7002,6 +6995,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (vcpu->arch.apic_attention)
kvm_lapic_sync_from_vapic(vcpu);

+ vcpu->arch.gpa_available = false;
r = kvm_x86_ops->handle_exit(vcpu);
return r;

--
1.8.3.1


2017-08-17 16:37:44

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH 3/3] KVM: x86: fix use of L1 MMIO areas in nested guests

There is currently some confusion between nested and L1 GPAs. The
assignment to "direct" in kvm_mmu_page_fault tries to fix that, but
it is not enough. What this patch does is fence off the MMIO cache
completely when using shadow nested page tables, since we have neither
a GVA nor an L1 GPA to put in the cache. This also allows some
simplifications in kvm_mmu_page_fault and FNAME(page_fault).

The EPT misconfig likewise does not have an L1 GPA to pass to
kvm_io_bus_write, so that must be skipped for guest mode.

Signed-off-by: Paolo Bonzini <[email protected]>
---
v1->v2: standardize on "nGPA" moniker, replace nested ifs with &&

arch/x86/kvm/mmu.c | 10 +++++++++-
arch/x86/kvm/paging_tmpl.h | 3 +--
arch/x86/kvm/vmx.c | 7 ++++++-
arch/x86/kvm/x86.h | 6 +++++-
4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a2c592b14617..02f8c507b160 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3596,6 +3596,14 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)

static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
{
+ /*
+ * A nested guest cannot use the MMIO cache if it is using nested
+ * page tables, because cr2 is a nGPA while the cache stores L1's
+ * physical addresses.
+ */
+ if (mmu_is_nested(vcpu))
+ return false;
+
if (direct)
return vcpu_match_mmio_gpa(vcpu, addr);

@@ -4841,7 +4849,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
{
int r, emulation_type = EMULTYPE_RETRY;
enum emulation_result er;
- bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
+ bool direct = vcpu->arch.mmu.direct_map;

/* With shadow page tables, fault_address contains a GVA or nGPA. */
if (vcpu->arch.mmu.direct_map) {
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 3bb90ceeb52d..86b68dc5a649 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
&map_writable))
return 0;

- if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
- walker.gfn, pfn, walker.pte_access, &r))
+ if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
return r;

/*
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e2c8b33c35d1..61389ad784e4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6402,8 +6402,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
int ret;
gpa_t gpa;

+ /*
+ * A nested guest cannot optimize MMIO vmexits, because we have an
+ * nGPA here instead of the required GPA.
+ */
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
- if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+ if (!is_guest_mode(vcpu) &&
+ !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
trace_kvm_fast_mmio(gpa);
return kvm_skip_emulated_instruction(vcpu);
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 612067074905..113460370a7f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -90,7 +90,11 @@ static inline u32 bit(int bitno)
static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
gva_t gva, gfn_t gfn, unsigned access)
{
- vcpu->arch.mmio_gva = gva & PAGE_MASK;
+ /*
+ * If this is a shadow nested page table, the "GVA" is
+ * actually a nGPA.
+ */
+ vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
vcpu->arch.access = access;
vcpu->arch.mmio_gfn = gfn;
vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
--
1.8.3.1

2017-08-18 07:51:33

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH 1/3] KVM: x86: simplify ept_misconfig

On 17.08.2017 18:36, Paolo Bonzini wrote:
> Calling handle_mmio_page_fault() has been unnecessary since commit
> e9ee956e311d ("KVM: x86: MMU: Move handle_mmio_page_fault() call to
> kvm_mmu_page_fault()", 2016-02-22).
>
> handle_mmio_page_fault() can now be made static.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> v1->v2: make the function static.
>
> arch/x86/kvm/mmu.c | 19 ++++++++++++++++++-
> arch/x86/kvm/mmu.h | 17 -----------------
> arch/x86/kvm/vmx.c | 13 +++----------
> 3 files changed, 21 insertions(+), 28 deletions(-)

Reviewed-by: David Hildenbrand <[email protected]>


--

Thanks,

David

2017-08-18 07:57:42

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH 2/3] KVM: x86: Avoid guest page table walk when gpa_available is set


> +++ b/arch/x86/kvm/x86.c
> @@ -4657,25 +4657,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
> */
> if (vcpu->arch.gpa_available &&
> emulator_can_use_gpa(ctxt) &&
> - vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
> - (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
> - gpa = exception->address;
> - goto mmio;
> + (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
> + gpa = vcpu->arch.gpa_val;
> + ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
> + } else {
> + ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> }
>
> - ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> -
> if (ret < 0)
> return X86EMUL_PROPAGATE_FAULT;

just wondering if it makes sense to move this into the else branch (as
it logically only belongs to vcpu_mmio_gva_to_gpa)

Reviewed-by: David Hildenbrand <[email protected]>

--

Thanks,

David

2017-08-18 08:00:03

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: x86: fix use of L1 MMIO areas in nested guests

On 17.08.2017 18:36, Paolo Bonzini wrote:
> There is currently some confusion between nested and L1 GPAs. The
> assignment to "direct" in kvm_mmu_page_fault tries to fix that, but
> it is not enough. What this patch does is fence off the MMIO cache
> completely when using shadow nested page tables, since we have neither
> a GVA nor an L1 GPA to put in the cache. This also allows some
> simplifications in kvm_mmu_page_fault and FNAME(page_fault).
>
> The EPT misconfig likewise does not have an L1 GPA to pass to
> kvm_io_bus_write, so that must be skipped for guest mode.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> v1->v2: standardize on "nGPA" moniker, replace nested ifs with &&
>
> arch/x86/kvm/mmu.c | 10 +++++++++-
> arch/x86/kvm/paging_tmpl.h | 3 +--
> arch/x86/kvm/vmx.c | 7 ++++++-
> arch/x86/kvm/x86.h | 6 +++++-
> 4 files changed, 21 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index a2c592b14617..02f8c507b160 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3596,6 +3596,14 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
>
> static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
> {
> + /*
> + * A nested guest cannot use the MMIO cache if it is using nested
> + * page tables, because cr2 is a nGPA while the cache stores L1's
> + * physical addresses.

... "while the cache stores GPAs" ?

> + */
> + if (mmu_is_nested(vcpu))
> + return false;
> +
> if (direct)
> return vcpu_match_mmio_gpa(vcpu, addr);
>
> @@ -4841,7 +4849,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
> {
> int r, emulation_type = EMULTYPE_RETRY;
> enum emulation_result er;
> - bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
> + bool direct = vcpu->arch.mmu.direct_map;
>
> /* With shadow page tables, fault_address contains a GVA or nGPA. */
> if (vcpu->arch.mmu.direct_map) {
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 3bb90ceeb52d..86b68dc5a649 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
> &map_writable))
> return 0;
>
> - if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
> - walker.gfn, pfn, walker.pte_access, &r))
> + if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
> return r;
>
> /*
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index e2c8b33c35d1..61389ad784e4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6402,8 +6402,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
> int ret;
> gpa_t gpa;
>
> + /*
> + * A nested guest cannot optimize MMIO vmexits, because we have an
> + * nGPA here instead of the required GPA.
> + */
> gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
> - if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
> + if (!is_guest_mode(vcpu) &&
> + !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
> trace_kvm_fast_mmio(gpa);
> return kvm_skip_emulated_instruction(vcpu);
> }
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 612067074905..113460370a7f 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -90,7 +90,11 @@ static inline u32 bit(int bitno)
> static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
> gva_t gva, gfn_t gfn, unsigned access)
> {
> - vcpu->arch.mmio_gva = gva & PAGE_MASK;
> + /*
> + * If this is a shadow nested page table, the "GVA" is

s/"GVA"/GVA/ ?

> + * actually a nGPA.
> + */
> + vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
> vcpu->arch.access = access;
> vcpu->arch.mmio_gfn = gfn;
> vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
>

Reviewed-by: David Hildenbrand <[email protected]>

--

Thanks,

David

2017-08-18 12:35:57

by Radim Krčmář

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: x86: fix use of L1 MMIO areas in nested guests

2017-08-18 09:59+0200, David Hildenbrand:
> On 17.08.2017 18:36, Paolo Bonzini wrote:
> > There is currently some confusion between nested and L1 GPAs. The
> > assignment to "direct" in kvm_mmu_page_fault tries to fix that, but
> > it is not enough. What this patch does is fence off the MMIO cache
> > completely when using shadow nested page tables, since we have neither
> > a GVA nor an L1 GPA to put in the cache. This also allows some
> > simplifications in kvm_mmu_page_fault and FNAME(page_fault).
> >
> > The EPT misconfig likewise does not have an L1 GPA to pass to
> > kvm_io_bus_write, so that must be skipped for guest mode.
> >
> > Signed-off-by: Paolo Bonzini <[email protected]>
> > ---
> > v1->v2: standardize on "nGPA" moniker, replace nested ifs with &&
> >
> > arch/x86/kvm/mmu.c | 10 +++++++++-
> > arch/x86/kvm/paging_tmpl.h | 3 +--
> > arch/x86/kvm/vmx.c | 7 ++++++-
> > arch/x86/kvm/x86.h | 6 +++++-
> > 4 files changed, 21 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> > index a2c592b14617..02f8c507b160 100644
> > --- a/arch/x86/kvm/mmu.c
> > +++ b/arch/x86/kvm/mmu.c
> > @@ -3596,6 +3596,14 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
> >
> > static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
> > {
> > + /*
> > + * A nested guest cannot use the MMIO cache if it is using nested
> > + * page tables, because cr2 is a nGPA while the cache stores L1's
> > + * physical addresses.
>
> ... "while the cache stores GPAs" ?

Makes sense, changed while applying.

> > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> > @@ -90,7 +90,11 @@ static inline u32 bit(int bitno)
> > static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
> > gva_t gva, gfn_t gfn, unsigned access)
> > {
> > - vcpu->arch.mmio_gva = gva & PAGE_MASK;
> > + /*
> > + * If this is a shadow nested page table, the "GVA" is
>
> s/"GVA"/GVA/ ?

I prefer the former, we're talking about "gva_t gva" that isn't GVA. :)

2017-08-18 12:36:59

by Radim Krčmář

[permalink] [raw]
Subject: Re: [PATCH 2/3] KVM: x86: Avoid guest page table walk when gpa_available is set

2017-08-18 09:57+0200, David Hildenbrand:
>
> > +++ b/arch/x86/kvm/x86.c
> > @@ -4657,25 +4657,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
> > */
> > if (vcpu->arch.gpa_available &&
> > emulator_can_use_gpa(ctxt) &&
> > - vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
> > - (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
> > - gpa = exception->address;
> > - goto mmio;
> > + (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
> > + gpa = vcpu->arch.gpa_val;
> > + ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
> > + } else {
> > + ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> > }
> >
> > - ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> > -
> > if (ret < 0)
> > return X86EMUL_PROPAGATE_FAULT;
>
> just wondering if it makes sense to move this into the else branch (as
> it logically only belongs to vcpu_mmio_gva_to_gpa)

It does, I took the liberty to change that.

> Reviewed-by: David Hildenbrand <[email protected]>

Thanks.

2017-08-18 12:38:02

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 2/3] KVM: x86: Avoid guest page table walk when gpa_available is set

On 18/08/2017 14:36, Radim Krčmář wrote:
>>> + gpa = vcpu->arch.gpa_val;
>>> + ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
>>> + } else {
>>> + ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
>>> }
>>>
>>> - ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
>>> -
>>> if (ret < 0)
>>> return X86EMUL_PROPAGATE_FAULT;
>> just wondering if it makes sense to move this into the else branch (as
>> it logically only belongs to vcpu_mmio_gva_to_gpa)
>
> It does, I took the liberty to change that.

It was on purpose, but it's okay either way. :)

Paolo

2017-08-18 12:38:36

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: x86: fix use of L1 MMIO areas in nested guests

On 18/08/2017 14:35, Radim Krčmář wrote:
>
>>> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
>>> @@ -90,7 +90,11 @@ static inline u32 bit(int bitno)
>>> static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
>>> gva_t gva, gfn_t gfn, unsigned access)
>>> {
>>> - vcpu->arch.mmio_gva = gva & PAGE_MASK;
>>> + /*
>>> + * If this is a shadow nested page table, the "GVA" is
>> s/"GVA"/GVA/ ?
> I prefer the former, we're talking about "gva_t gva" that isn't GVA. :)

Exactly. :)

Paolo

2017-08-18 12:40:53

by Radim Krčmář

[permalink] [raw]
Subject: Re: [PATCH 2/3] KVM: x86: Avoid guest page table walk when gpa_available is set

2017-08-18 14:37+0200, Paolo Bonzini:
> On 18/08/2017 14:36, Radim Krčmář wrote:
> >>> + gpa = vcpu->arch.gpa_val;
> >>> + ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
> >>> + } else {
> >>> + ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> >>> }
> >>>
> >>> - ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> >>> -
> >>> if (ret < 0)
> >>> return X86EMUL_PROPAGATE_FAULT;
> >> just wondering if it makes sense to move this into the else branch (as
> >> it logically only belongs to vcpu_mmio_gva_to_gpa)
> >
> > It does, I took the liberty to change that.
>
> It was on purpose, but it's okay either way. :)

Oh, sorry, I was thinking that vcpu_is_mmio_gpa() should return bool. :)

2019-02-05 20:19:09

by Jim Mattson

[permalink] [raw]
Subject: Re: [PATCH 3/3] KVM: x86: fix use of L1 MMIO areas in nested guests

On Thu, Aug 17, 2017 at 9:37 AM Paolo Bonzini <[email protected]> wrote:
>
> There is currently some confusion between nested and L1 GPAs. The
> assignment to "direct" in kvm_mmu_page_fault tries to fix that, but
> it is not enough. What this patch does is fence off the MMIO cache
> completely when using shadow nested page tables, since we have neither
> a GVA nor an L1 GPA to put in the cache. This also allows some
> simplifications in kvm_mmu_page_fault and FNAME(page_fault).
>
> The EPT misconfig likewise does not have an L1 GPA to pass to
> kvm_io_bus_write, so that must be skipped for guest mode.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> v1->v2: standardize on "nGPA" moniker, replace nested ifs with &&
>
> arch/x86/kvm/mmu.c | 10 +++++++++-
> arch/x86/kvm/paging_tmpl.h | 3 +--
> arch/x86/kvm/vmx.c | 7 ++++++-
> arch/x86/kvm/x86.h | 6 +++++-
> 4 files changed, 21 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index a2c592b14617..02f8c507b160 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3596,6 +3596,14 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
>
> static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
> {
> + /*
> + * A nested guest cannot use the MMIO cache if it is using nested
> + * page tables, because cr2 is a nGPA while the cache stores L1's
> + * physical addresses.
> + */
> + if (mmu_is_nested(vcpu))
> + return false;
> +
> if (direct)
> return vcpu_match_mmio_gpa(vcpu, addr);
>
> @@ -4841,7 +4849,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
> {
> int r, emulation_type = EMULTYPE_RETRY;
> enum emulation_result er;
> - bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
> + bool direct = vcpu->arch.mmu.direct_map;
>
> /* With shadow page tables, fault_address contains a GVA or nGPA. */
> if (vcpu->arch.mmu.direct_map) {
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 3bb90ceeb52d..86b68dc5a649 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
> &map_writable))
> return 0;
>
> - if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
> - walker.gfn, pfn, walker.pte_access, &r))
> + if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
> return r;
>
> /*
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index e2c8b33c35d1..61389ad784e4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6402,8 +6402,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
> int ret;
> gpa_t gpa;
>
> + /*
> + * A nested guest cannot optimize MMIO vmexits, because we have an
> + * nGPA here instead of the required GPA.
> + */
> gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
> - if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
> + if (!is_guest_mode(vcpu) &&
> + !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
> trace_kvm_fast_mmio(gpa);
> return kvm_skip_emulated_instruction(vcpu);
> }
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 612067074905..113460370a7f 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -90,7 +90,11 @@ static inline u32 bit(int bitno)
> static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
> gva_t gva, gfn_t gfn, unsigned access)
> {
> - vcpu->arch.mmio_gva = gva & PAGE_MASK;
> + /*
> + * If this is a shadow nested page table, the "GVA" is
> + * actually a nGPA.
> + */
> + vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
> vcpu->arch.access = access;
> vcpu->arch.mmio_gfn = gfn;
> vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
> --
> 1.8.3.1

Should this patch be considered for the stable branches?