2017-04-24 15:54:54

by Brijesh Singh

[permalink] [raw]
Subject: [PATCH] x86: kvm: Avoid guest page table walk when gpa_available is set

On AMD hardware when a guest causes a NFP which requires emulation,
the vcpu->arch.gpa_available flag is set to indicate that cr2 contains
a valid GPA.

Currently, emulator_read_write_onepage() makes use of gpa_available flag
to avoid a guest page walk for a known MMIO regions. Lets not limit
the gpa_available optimization to just MMIO region. The patch extends
the check to avoid page walk whenever gpa_available flag is set.

Signed-off-by: Brijesh Singh <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/svm.c | 4 ++++
arch/x86/kvm/x86.c | 26 +++++++++++++++-----------
3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74ef58c..491326d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -677,6 +677,7 @@ struct kvm_vcpu_arch {

/* GPA available (AMD only) */
bool gpa_available;
+ gpa_t gpa_val;
};

struct kvm_lpage_info {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5fba706..8827e4b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4159,6 +4159,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)

vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);

+ /* On #NPF, exit_info_2 contain a valid GPA */
+ if (vcpu->arch.gpa_available)
+ vcpu->arch.gpa_val = svm->vmcb->control.exit_info_2;
+
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ccbd45e..18ec746 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4653,18 +4653,16 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
* occurred.
*/
if (vcpu->arch.gpa_available &&
- emulator_can_use_gpa(ctxt) &&
- vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
- (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
+ emulator_can_use_gpa(ctxt) &&
+ (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
gpa = exception->address;
- goto mmio;
+ ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
+ } else {
+ ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
+ if (ret < 0)
+ return X86EMUL_PROPAGATE_FAULT;
}

- ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
-
- if (ret < 0)
- return X86EMUL_PROPAGATE_FAULT;
-
/* For APIC access vmexit */
if (ret)
goto mmio;
@@ -5675,8 +5673,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
}

restart:
- /* Save the faulting GPA (cr2) in the address field */
- ctxt->exception.address = cr2;
+ /*
+ * Save the faulting GPA (cr2) in the address field
+ * NOTE: If gpa_available is set then gpa_val will contain a valid GPA
+ */
+ if (vcpu->arch.gpa_available)
+ ctxt->exception.address = vcpu->arch.gpa_val;
+ else
+ ctxt->exception.address = cr2;

r = x86_emulate_insn(ctxt);

--
1.9.1


2017-04-24 16:16:32

by Brijesh Singh

[permalink] [raw]
Subject: Re: [PATCH] x86: kvm: Avoid guest page table walk when gpa_available is set



On 04/24/2017 10:52 AM, Brijesh Singh wrote:
> On AMD hardware when a guest causes a NFP which requires emulation,

Just realized a typo in patch description, s/NFP/NPF


> the vcpu->arch.gpa_available flag is set to indicate that cr2 contains
> a valid GPA.
>
> Currently, emulator_read_write_onepage() makes use of gpa_available flag
> to avoid a guest page walk for a known MMIO regions. Lets not limit
> the gpa_available optimization to just MMIO region. The patch extends
> the check to avoid page walk whenever gpa_available flag is set.
>
> Signed-off-by: Brijesh Singh <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/svm.c | 4 ++++
> arch/x86/kvm/x86.c | 26 +++++++++++++++-----------
> 3 files changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 74ef58c..491326d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -677,6 +677,7 @@ struct kvm_vcpu_arch {
>
> /* GPA available (AMD only) */
> bool gpa_available;
> + gpa_t gpa_val;
> };
>
> struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 5fba706..8827e4b 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4159,6 +4159,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
>
> vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
>
> + /* On #NPF, exit_info_2 contain a valid GPA */
> + if (vcpu->arch.gpa_available)
> + vcpu->arch.gpa_val = svm->vmcb->control.exit_info_2;
> +
> if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
> vcpu->arch.cr0 = svm->vmcb->save.cr0;
> if (npt_enabled)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ccbd45e..18ec746 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4653,18 +4653,16 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
> * occurred.
> */
> if (vcpu->arch.gpa_available &&
> - emulator_can_use_gpa(ctxt) &&
> - vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
> - (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
> + emulator_can_use_gpa(ctxt) &&
> + (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
> gpa = exception->address;
> - goto mmio;
> + ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
> + } else {
> + ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> + if (ret < 0)
> + return X86EMUL_PROPAGATE_FAULT;
> }
>
> - ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> -
> - if (ret < 0)
> - return X86EMUL_PROPAGATE_FAULT;
> -
> /* For APIC access vmexit */
> if (ret)
> goto mmio;
> @@ -5675,8 +5673,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
> }
>
> restart:
> - /* Save the faulting GPA (cr2) in the address field */
> - ctxt->exception.address = cr2;
> + /*
> + * Save the faulting GPA (cr2) in the address field
> + * NOTE: If gpa_available is set then gpa_val will contain a valid GPA
> + */
> + if (vcpu->arch.gpa_available)
> + ctxt->exception.address = vcpu->arch.gpa_val;
> + else
> + ctxt->exception.address = cr2;
>
> r = x86_emulate_insn(ctxt);
>
>

2017-04-24 20:53:00

by Radim Krčmář

[permalink] [raw]
Subject: Re: [PATCH] x86: kvm: Avoid guest page table walk when gpa_available is set

2017-04-24 11:52-0400, Brijesh Singh:
> On AMD hardware when a guest causes a NFP which requires emulation,
> the vcpu->arch.gpa_available flag is set to indicate that cr2 contains
> a valid GPA.
>
> Currently, emulator_read_write_onepage() makes use of gpa_available flag
> to avoid a guest page walk for a known MMIO regions. Lets not limit
> the gpa_available optimization to just MMIO region. The patch extends
> the check to avoid page walk whenever gpa_available flag is set.
>
> Signed-off-by: Brijesh Singh <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/svm.c | 4 ++++
> arch/x86/kvm/x86.c | 26 +++++++++++++++-----------
> 3 files changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 74ef58c..491326d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -677,6 +677,7 @@ struct kvm_vcpu_arch {
>
> /* GPA available (AMD only) */
> bool gpa_available;
> + gpa_t gpa_val;

Can't we pass this information through function parameters?

(I'd rather avoid intractable variables.)

> };
>
> struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 5fba706..8827e4b 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4159,6 +4159,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
>
> vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
>
> + /* On #NPF, exit_info_2 contain a valid GPA */
> + if (vcpu->arch.gpa_available)
> + vcpu->arch.gpa_val = svm->vmcb->control.exit_info_2;

How is vcpu->arch.gpa_val used between here and the NPF handler?

> +
> if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
> vcpu->arch.cr0 = svm->vmcb->save.cr0;
> if (npt_enabled)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> @@ -4653,18 +4653,16 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
> * occurred.
> */
> if (vcpu->arch.gpa_available &&
> - emulator_can_use_gpa(ctxt) &&
> - vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
> - (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
> + emulator_can_use_gpa(ctxt) &&
> + (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
> gpa = exception->address;
> - goto mmio;
> + ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
> + } else {
> + ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> + if (ret < 0)
> + return X86EMUL_PROPAGATE_FAULT;
> }
>
> - ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
> -
> - if (ret < 0)
> - return X86EMUL_PROPAGATE_FAULT;
> -
> /* For APIC access vmexit */
> if (ret)
> goto mmio;
> @@ -5675,8 +5673,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
> }
>
> restart:
> - /* Save the faulting GPA (cr2) in the address field */
> - ctxt->exception.address = cr2;
> + /*
> + * Save the faulting GPA (cr2) in the address field
> + * NOTE: If gpa_available is set then gpa_val will contain a valid GPA
> + */
> + if (vcpu->arch.gpa_available)
> + ctxt->exception.address = vcpu->arch.gpa_val;
> + else
> + ctxt->exception.address = cr2;

And related, shouldn't vcpu->arch.gpa_val be in cr2?

Thanks.

2017-04-24 22:14:35

by Brijesh Singh

[permalink] [raw]
Subject: Re: [PATCH] x86: kvm: Avoid guest page table walk when gpa_available is set

Hi Radim,


>> /* GPA available (AMD only) */
>> bool gpa_available;
>> + gpa_t gpa_val;
>
> Can't we pass this information through function parameters?
>
> (I'd rather avoid intractable variables.)
>

I also wanted to avoid adding yet another variable but we can't depend on
cr2 parameters passed into x86_emulate_instruction().

The x86_emulate_instruction() function is called from two places:

1) handling the page-fault.
pf_interception [svm.c]
kvm_mmu_page_fault [mmu.c]
x86_emulate_instruction [x86.c]

2) completing the IO/MMIO's from previous instruction decode
kvm_arch_vcpu_ioctl_run
complete_emulated_io
emulate_instruction
x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0)

In #1, we are guaranteed that cr2 variable will contain a valid GPA but
in #2, CR2 is set to zero.

>> };
>>
>> struct kvm_lpage_info {
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 5fba706..8827e4b 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -4159,6 +4159,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
>>
>> vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
>>
>> + /* On #NPF, exit_info_2 contain a valid GPA */
>> + if (vcpu->arch.gpa_available)
>> + vcpu->arch.gpa_val = svm->vmcb->control.exit_info_2;
>
> How is vcpu->arch.gpa_val used between here and the NPF handler?
>

handle_exit [svm.c]
pf_interception [svm.c]
/* it invokes the fault handler with CR2 = svm->vmcb->control.exit_info_2 */
kvm_mmu_page_fault [mmu.c]
x86_emulate_instruction [x86.c]
emulator_read_write_onepage [x86.c]
/*
*this is where we walk the guest page table to translate
* a GVA to GPA. If gpa_available is set then we use the
* gpa_val instead of walking the pgtable.
*/

>> +
>> if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
>> vcpu->arch.cr0 = svm->vmcb->save.cr0;
>> if (npt_enabled)
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> @@ -4653,18 +4653,16 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
>> * occurred.
>> */
>> if (vcpu->arch.gpa_available &&
>> - emulator_can_use_gpa(ctxt) &&
>> - vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
>> - (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
>> + emulator_can_use_gpa(ctxt) &&
>> + (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
>> gpa = exception->address;
>> - goto mmio;
>> + ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
>> + } else {
>> + ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
>> + if (ret < 0)
>> + return X86EMUL_PROPAGATE_FAULT;
>> }
>>
>> - ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
>> -
>> - if (ret < 0)
>> - return X86EMUL_PROPAGATE_FAULT;
>> -
>> /* For APIC access vmexit */
>> if (ret)
>> goto mmio;
>> @@ -5675,8 +5673,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
>> }
>>
>> restart:
>> - /* Save the faulting GPA (cr2) in the address field */
>> - ctxt->exception.address = cr2;
>> + /*
>> + * Save the faulting GPA (cr2) in the address field
>> + * NOTE: If gpa_available is set then gpa_val will contain a valid GPA
>> + */
>> + if (vcpu->arch.gpa_available)
>> + ctxt->exception.address = vcpu->arch.gpa_val;
>> + else
>> + ctxt->exception.address = cr2;
>
> And related, shouldn't vcpu->arch.gpa_val be in cr2?
>

See my previous comment. In some cases CR2 may be set to zero
(e.g when completing the instruction from previous io/mmio page-fault).

If we are decide to add the gpa_val then we can remove above if
statement from x86_emulate_instruction() and update emulator_read_write_onepage
to use the vcpu->arch.gpa_val instead of exception->address.

if (vcpu->arch.gpa_available &&
emulator_can_use_gpa(ctxt) &&
(addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
gpa = vcpu=>arch.gpa_val;
...
...
}

-Brijesh

2017-04-25 14:04:04

by Radim Krčmář

[permalink] [raw]
Subject: Re: [PATCH] x86: kvm: Avoid guest page table walk when gpa_available is set

2017-04-24 17:14-0500, Brijesh Singh:
>> > /* GPA available (AMD only) */
>> > bool gpa_available;
>> > + gpa_t gpa_val;
>>
>> Can't we pass this information through function parameters?
>>
>> (I'd rather avoid intractable variables.)
>>
>
> I also wanted to avoid adding yet another variable but we can't depend on
> cr2 parameters passed into x86_emulate_instruction().
>
> The x86_emulate_instruction() function is called from two places:
>
> 1) handling the page-fault.
> pf_interception [svm.c]
> kvm_mmu_page_fault [mmu.c]
> x86_emulate_instruction [x86.c]
>
> 2) completing the IO/MMIO's from previous instruction decode
> kvm_arch_vcpu_ioctl_run
> complete_emulated_io
> emulate_instruction
> x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0)
>
> In #1, we are guaranteed that cr2 variable will contain a valid GPA but
> in #2, CR2 is set to zero.

We are setting up the completion in #1 x86_emulate_instruction(), where
the gpa (cr2) is available, so we could store the value while arming
vcpu->arch.complete_userspace_io.

emulator_read_write_onepage() already saves gpa in frag->gpa, which is
then passed into complete_emulated_mmio -- isn't that mechanism
sufficient?

>> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> > @@ -4159,6 +4159,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
>> >
>> > vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
>> >
>> > + /* On #NPF, exit_info_2 contain a valid GPA */
>> > + if (vcpu->arch.gpa_available)
>> > + vcpu->arch.gpa_val = svm->vmcb->control.exit_info_2;
>>
>> How is vcpu->arch.gpa_val used between here and the NPF handler?
>>
>
> handle_exit [svm.c]
> pf_interception [svm.c]
> /* it invokes the fault handler with CR2 = svm->vmcb->control.exit_info_2 */
> kvm_mmu_page_fault [mmu.c]
> x86_emulate_instruction [x86.c]
> emulator_read_write_onepage [x86.c]
> /*
> *this is where we walk the guest page table to translate
> * a GVA to GPA. If gpa_available is set then we use the
> * gpa_val instead of walking the pgtable.
> */

pf_interception is the NPF exit handler -- please move the setting
there, at least. handle_exit() is a hot path that shouldn't contain
code that isn't applicable to all exits.

Btw. we need some other guarantees to declare it as GPA (cr2 is GPA in
NPT exits, but might not be in other) ... isn't arch.mmu.direct_map a
condition we are interested in?

The other code uses it to interpret cr2 directly as gpa, so we might be
able to avoid setting the arch.gpa_available in a hot path too.

>> > @@ -5675,8 +5673,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
>> > }
>> >
>> > restart:
>> > - /* Save the faulting GPA (cr2) in the address field */
>> > - ctxt->exception.address = cr2;
>> > + /*
>> > + * Save the faulting GPA (cr2) in the address field
>> > + * NOTE: If gpa_available is set then gpa_val will contain a valid GPA
>> > + */
>> > + if (vcpu->arch.gpa_available)
>> > + ctxt->exception.address = vcpu->arch.gpa_val;
>> > + else
>> > + ctxt->exception.address = cr2;
>>
>> And related, shouldn't vcpu->arch.gpa_val be in cr2?
>>
>
> See my previous comment. In some cases CR2 may be set to zero
> (e.g when completing the instruction from previous io/mmio page-fault).
>
> If we are decide to add the gpa_val then we can remove above if
> statement from x86_emulate_instruction() and update emulator_read_write_onepage
> to use the vcpu->arch.gpa_val instead of exception->address.

Yeah, that would be nicer than setting exception->address at a random
place.

We could also pass the value as cr2 in all cases if it made something
better.

> if (vcpu->arch.gpa_available &&
> emulator_can_use_gpa(ctxt) &&
> (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
> gpa = vcpu=>arch.gpa_val;
> ...
> ...
> }

If at all possible, I'd like to have the gpa passed with other relevant
data, instead of having it isolated like this ... and we can't manage
that, then at least good benchmark results to excuse the bad code.

Thanks.

2017-04-25 22:03:13

by Brijesh Singh

[permalink] [raw]
Subject: Re: [PATCH] x86: kvm: Avoid guest page table walk when gpa_available is set


>>
>> I also wanted to avoid adding yet another variable but we can't depend on
>> cr2 parameters passed into x86_emulate_instruction().
>>
>> The x86_emulate_instruction() function is called from two places:
>>
>> 1) handling the page-fault.
>> pf_interception [svm.c]
>> kvm_mmu_page_fault [mmu.c]
>> x86_emulate_instruction [x86.c]
>>
>> 2) completing the IO/MMIO's from previous instruction decode
>> kvm_arch_vcpu_ioctl_run
>> complete_emulated_io
>> emulate_instruction
>> x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0)
>>
>> In #1, we are guaranteed that cr2 variable will contain a valid GPA but
>> in #2, CR2 is set to zero.
>
> We are setting up the completion in #1 x86_emulate_instruction(), where
> the gpa (cr2) is available, so we could store the value while arming
> vcpu->arch.complete_userspace_io.
>
> emulator_read_write_onepage() already saves gpa in frag->gpa, which is
> then passed into complete_emulated_mmio -- isn't that mechanism
> sufficient?
>

I see that complete_emulated_mmio() saves the frag>gpa into run->mmio.phys_addr,
so based on the exit_reason we should be able to get the saved gpa.
In my debug patch below, I tried doing something similar to verify that frag->gpa
contains the valid CR2 value but I saw a bunch of mismatch. So it seems like we
may not able to use frag->gpa mechanism. Additionally we also need to handle the
PIO cases (e.g what if we are called from complete_emulated_pio), which also takes
similar code path

complete_emulated_pio
completed_emulated_io
emulate_instruction
x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0)

@@ -5682,13 +5686,20 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,

restart:
/*
- * Save the faulting GPA (cr2) in the address field
- * NOTE: If gpa_available is set then gpa_val will contain a valid GPA
+ * if previous exit was due to userspace mmio completion then actual
+ * cr2 is stored in mmio.phys_addr.
*/
- if (vcpu->arch.gpa_available)
- ctxt->exception.address = vcpu->arch.gpa_val;
- else
- ctxt->exception.address = cr2;
+ if (vcpu->run->exit_reason == KVM_EXIT_MMIO) {
+ cr2 = vcpu->run->mmio.phys_addr;
+ if (cr2 != vcpu->arch.gpa_val)
+ pr_err("** mismatch %llx %llx\n",
+ vcpu->run->mmio.phys_addr, vcpu->arch.gpa_val);
+ }
+
+ /* Save the faulting GPA (cr2) in the address field */
+ ctxt->exception.address = cr2;


>>
>> handle_exit [svm.c]
>> pf_interception [svm.c]
>> /* it invokes the fault handler with CR2 = svm->vmcb->control.exit_info_2 */
>> kvm_mmu_page_fault [mmu.c]
>> x86_emulate_instruction [x86.c]
>> emulator_read_write_onepage [x86.c]
>> /*
>> *this is where we walk the guest page table to translate
>> * a GVA to GPA. If gpa_available is set then we use the
>> * gpa_val instead of walking the pgtable.
>> */
>
> pf_interception is the NPF exit handler -- please move the setting
> there, at least. handle_exit() is a hot path that shouldn't contain
> code that isn't applicable to all exits.
>

Sure, Will do.

> Btw. we need some other guarantees to declare it as GPA (cr2 is GPA in
> NPT exits, but might not be in other) ... isn't arch.mmu.direct_map a
> condition we are interested in?
>
> The other code uses it to interpret cr2 directly as gpa, so we might be
> able to avoid setting the arch.gpa_available in a hot path too.
>

Hmm looking at the call trace I am not sure how arch.mmu_direct_map will help
but I will investigate a bit more.

>>
>> See my previous comment. In some cases CR2 may be set to zero
>> (e.g when completing the instruction from previous io/mmio page-fault).
>>
>> If we are decide to add the gpa_val then we can remove above if
>> statement from x86_emulate_instruction() and update emulator_read_write_onepage
>> to use the vcpu->arch.gpa_val instead of exception->address.
>
> Yeah, that would be nicer than setting exception->address at a random
> place.
>
> We could also pass the value as cr2 in all cases if it made something
> better.
>
>> if (vcpu->arch.gpa_available &&
>> emulator_can_use_gpa(ctxt) &&
>> (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
>> gpa = vcpu=>arch.gpa_val;
>> ...
>> ...
>> }
>
> If at all possible, I'd like to have the gpa passed with other relevant
> data, instead of having it isolated like this ... and we can't manage
> that, then at least good benchmark results to excuse the bad code.
>

I ran two tests to see how many times we walk guest page table.

Test1: run kvm-unit-test
Test2: launch Ubuntu 16.06 guest, run a stressapptest for 20 seconds, shutdown the VM

Before patch
* Test1: 10419
* Test2: 243365

After patch:
* Test1: 1259
* Test2: 1221

Please let me know if you want me to run other other benchmark and capture the data.

-Brijesh

2017-04-26 20:44:39

by Radim Krčmář

[permalink] [raw]
Subject: Re: [PATCH] x86: kvm: Avoid guest page table walk when gpa_available is set

2017-04-25 17:02-0500, Brijesh Singh:
> > > I also wanted to avoid adding yet another variable but we can't depend on
> > > cr2 parameters passed into x86_emulate_instruction().
> > >
> > > The x86_emulate_instruction() function is called from two places:
> > >
> > > 1) handling the page-fault.
> > > pf_interception [svm.c]
> > > kvm_mmu_page_fault [mmu.c]
> > > x86_emulate_instruction [x86.c]
> > >
> > > 2) completing the IO/MMIO's from previous instruction decode
> > > kvm_arch_vcpu_ioctl_run
> > > complete_emulated_io
> > > emulate_instruction
> > > x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0)
> > >
> > > In #1, we are guaranteed that cr2 variable will contain a valid GPA but
> > > in #2, CR2 is set to zero.
> >
> > We are setting up the completion in #1 x86_emulate_instruction(), where
> > the gpa (cr2) is available, so we could store the value while arming
> > vcpu->arch.complete_userspace_io.
> >
> > emulator_read_write_onepage() already saves gpa in frag->gpa, which is
> > then passed into complete_emulated_mmio -- isn't that mechanism
> > sufficient?
> >
>
> I see that complete_emulated_mmio() saves the frag>gpa into run->mmio.phys_addr,
> so based on the exit_reason we should be able to get the saved gpa.

I mean, we could pass the frag->gpa into x86_emulate_instruction().
I think that exit_reason is related just accidentally and relying on it
inside x86_emulate_instruction() is bad.

> In my debug patch below, I tried doing something similar to verify that frag->gpa
> contains the valid CR2 value but I saw a bunch of mismatch. So it seems like we
> may not able to use frag->gpa mechanism. Additionally we also need to handle the
> PIO cases (e.g what if we are called from complete_emulated_pio), which also takes
> similar code path

Yes, but PIO should trigger a NPF exit relatively rarely and
generalizing the method from MMIO will be easy.

> complete_emulated_pio
> completed_emulated_io
> emulate_instruction
> x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0)
>
> @@ -5682,13 +5686,20 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
> restart:
> /*
> - * Save the faulting GPA (cr2) in the address field
> - * NOTE: If gpa_available is set then gpa_val will contain a valid GPA
> + * if previous exit was due to userspace mmio completion then actual
> + * cr2 is stored in mmio.phys_addr.
> */
> - if (vcpu->arch.gpa_available)
> - ctxt->exception.address = vcpu->arch.gpa_val;
> - else
> - ctxt->exception.address = cr2;
> + if (vcpu->run->exit_reason == KVM_EXIT_MMIO) {
> + cr2 = vcpu->run->mmio.phys_addr;
> + if (cr2 != vcpu->arch.gpa_val)
> + pr_err("** mismatch %llx %llx\n",
> + vcpu->run->mmio.phys_addr, vcpu->arch.gpa_val);

This will probably return false negatives when then vcpu->arch.gpa_val
couldn't be used anyway (possibly after a VM exits), so it is hard to
draw a conclusion.

I would really like if we had a solution that passed the gpa inside
function parameters.

(Btw. cr2 can also be a virtual address, so we can call it gpa directly)

> > If at all possible, I'd like to have the gpa passed with other relevant
> > data, instead of having it isolated like this ... and we can't manage
> > that, then at least good benchmark results to excuse the bad code.
> >
>
> I ran two tests to see how many times we walk guest page table.
>
> Test1: run kvm-unit-test
> Test2: launch Ubuntu 16.06 guest, run a stressapptest for 20 seconds, shutdown the VM
>
> Before patch
> * Test1: 10419
> * Test2: 243365
>
> After patch:
> * Test1: 1259
> * Test2: 1221
>
> Please let me know if you want me to run other other benchmark and capture the data.

Looks convincing, thanks.

2017-04-28 19:15:54

by Brijesh Singh

[permalink] [raw]
Subject: Re: [PATCH] x86: kvm: Avoid guest page table walk when gpa_available is set

Hi Radim,

>
> This will probably return false negatives when then vcpu->arch.gpa_val
> couldn't be used anyway (possibly after a VM exits), so it is hard to
> draw a conclusion.
>
> I would really like if we had a solution that passed the gpa inside
> function parameters.
>
> (Btw. cr2 can also be a virtual address, so we can call it gpa directly)
>

I've tried the below patch and it seems to work fine. This does not consider
PIO case and as you rightly pointed PIO should trigger #NPF relatively rarely.
At least so far in my runs I have not seen PIO causing #NPF. If this sounds
acceptable approach then I can submit v2 with these changes and remove gpa_val
additional.

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 13c132b..c040e38 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4662,17 +4662,17 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
*/
if (vcpu->arch.gpa_available &&
emulator_can_use_gpa(ctxt) &&
- vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
(addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
gpa = exception->address;
- goto mmio;
+ ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
+ } else {
+ dump_stack();
+ ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
+
+ if (ret < 0)
+ return X86EMUL_PROPAGATE_FAULT;
}

- ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
-
- if (ret < 0)
- return X86EMUL_PROPAGATE_FAULT;
-
/* For APIC access vmexit */
if (ret)
goto mmio;
@@ -7056,11 +7056,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
return r;
}

-static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
+static inline int complete_emulated_io(struct kvm_vcpu *vcpu, unsigned long cr2)
{
int r;
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+ r = x86_emulate_instruction(vcpu, cr2, EMULTYPE_NO_DECODE, NULL, 0);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (r != EMULATE_DONE)
return 0;
@@ -7071,7 +7071,7 @@ static int complete_emulated_pio(struct kvm_vcpu *vcpu)
{
BUG_ON(!vcpu->arch.pio.count);

- return complete_emulated_io(vcpu);
+ return complete_emulated_io(vcpu, 0);
}
/*
@@ -7097,6 +7097,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
struct kvm_mmio_fragment *frag;
unsigned len;
+ gpa_t gpa;

BUG_ON(!vcpu->mmio_needed);

@@ -7106,6 +7107,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
if (!vcpu->mmio_is_write)
memcpy(frag->data, run->mmio.data, len);

+ /*
+ * lets use the GPA from previous guest page table walk to avoid yet
+ * another guest page table walk when completing the MMIO page-fault.
+ */
+ gpa = frag->gpa;
+
if (frag->len <= 8) {
/* Switch to the next fragment. */
frag++;
@@ -7124,7 +7131,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
if (vcpu->mmio_is_write)
return 1;
vcpu->mmio_read_completed = 1;
- return complete_emulated_io(vcpu);
+ return complete_emulated_io(vcpu, gpa);
}

run->exit_reason = KVM_EXIT_MMIO;