2022-04-16 01:57:41

by Sean Christopherson

[permalink] [raw]
Subject: [PATCH] KVM: x86/mmu: Add RET_PF_CONTINUE to eliminate bool+int* "returns"

Add RET_PF_CONTINUE and use it in handle_abnormal_pfn() and
kvm_faultin_pfn() to signal that the page fault handler should continue
doing its thing. Aside from being gross and inefficient, using a boolean
return to signal continue vs. stop makes it extremely difficult to add
more helpers and/or move existing code to a helper.

E.g. hypothetically, if nested MMUs were to gain a separate page fault
handler in the future, everything up to the "is self-modifying PTE" check
can be shared by all shadow MMUs, but communicating up the stack whether
to continue on or stop becomes a nightmare.

More concretely, proposed support for private guest memory ran into a
similar issue, where it'll be forced to forego a helper in order to yield
sane code: https://lore.kernel.org/all/YkJbxiL%[email protected].

No functional change intended.

Cc: David Matlack <[email protected]>
Cc: Chao Peng <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/kvm/mmu/mmu.c | 51 ++++++++++++++-------------------
arch/x86/kvm/mmu/mmu_internal.h | 9 +++++-
arch/x86/kvm/mmu/paging_tmpl.h | 6 ++--
3 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 69a30d6d1e2b..cb2982c6b513 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2972,14 +2972,12 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
return -EFAULT;
}

-static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
- unsigned int access, int *ret_val)
+static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
+ unsigned int access)
{
/* The pfn is invalid, report the error! */
- if (unlikely(is_error_pfn(fault->pfn))) {
- *ret_val = kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
- return true;
- }
+ if (unlikely(is_error_pfn(fault->pfn)))
+ return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);

if (unlikely(!fault->slot)) {
gva_t gva = fault->is_tdp ? 0 : fault->addr;
@@ -2991,13 +2989,11 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
* touching the shadow page tables as attempting to install an
* MMIO SPTE will just be an expensive nop.
*/
- if (unlikely(!shadow_mmio_value)) {
- *ret_val = RET_PF_EMULATE;
- return true;
- }
+ if (unlikely(!shadow_mmio_value))
+ return RET_PF_EMULATE;
}

- return false;
+ return RET_PF_CONTINUE;
}

static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
@@ -3888,7 +3884,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}

-static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int *r)
+static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
struct kvm_memory_slot *slot = fault->slot;
bool async;
@@ -3899,7 +3895,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
* be zapped before KVM inserts a new MMIO SPTE for the gfn.
*/
if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
- goto out_retry;
+ return RET_PF_RETRY;

if (!kvm_is_visible_memslot(slot)) {
/* Don't expose private memslots to L2. */
@@ -3907,7 +3903,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
fault->slot = NULL;
fault->pfn = KVM_PFN_NOSLOT;
fault->map_writable = false;
- return false;
+ return RET_PF_CONTINUE;
}
/*
* If the APIC access page exists but is disabled, go directly
@@ -3916,10 +3912,8 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
* when the AVIC is re-enabled.
*/
if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
- !kvm_apicv_activated(vcpu->kvm)) {
- *r = RET_PF_EMULATE;
- return true;
- }
+ !kvm_apicv_activated(vcpu->kvm))
+ return RET_PF_EMULATE;
}

async = false;
@@ -3927,26 +3921,23 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
fault->write, &fault->map_writable,
&fault->hva);
if (!async)
- return false; /* *pfn has correct page already */
+ return RET_PF_CONTINUE; /* *pfn has correct page already */

if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
trace_kvm_try_async_get_page(fault->addr, fault->gfn);
if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) {
trace_kvm_async_pf_doublefault(fault->addr, fault->gfn);
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
- goto out_retry;
- } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn))
- goto out_retry;
+ return RET_PF_RETRY;
+ } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
+ return RET_PF_RETRY;
+ }
}

fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL,
fault->write, &fault->map_writable,
&fault->hva);
- return false;
-
-out_retry:
- *r = RET_PF_RETRY;
- return true;
+ return RET_PF_CONTINUE;
}

/*
@@ -4001,10 +3992,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();

- if (kvm_faultin_pfn(vcpu, fault, &r))
+ r = kvm_faultin_pfn(vcpu, fault);
+ if (r != RET_PF_CONTINUE)
return r;

- if (handle_abnormal_pfn(vcpu, fault, ACC_ALL, &r))
+ r = handle_abnormal_pfn(vcpu, fault, ACC_ALL);
+ if (r != RET_PF_CONTINUE)
return r;

r = RET_PF_RETRY;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 1bff453f7cbe..c0e502b17ef7 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -143,6 +143,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
/*
* Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
*
+ * RET_PF_CONTINUE: So far, so good, keep handling the page fault.
* RET_PF_RETRY: let CPU fault again on the address.
* RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
* RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
@@ -151,9 +152,15 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
*
* Any names added to this enum should be exported to userspace for use in
* tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h
+ *
+ * Note, all values must be greater than or equal to zero so as not to encroach
+ * on -errno return values. Somewhat arbitrarily use '0' for CONTINUE, which
+ * will allow for efficient machine code when checking for CONTINUE, e.g.
+ * "TEST %rax, %rax, JNZ", as all "stop!" values are non-zero.
*/
enum {
- RET_PF_RETRY = 0,
+ RET_PF_CONTINUE = 0,
+ RET_PF_RETRY,
RET_PF_EMULATE,
RET_PF_INVALID,
RET_PF_FIXED,
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 66f1acf153c4..7038273d04ab 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -838,10 +838,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();

- if (kvm_faultin_pfn(vcpu, fault, &r))
+ r = kvm_faultin_pfn(vcpu, fault);
+ if (r != RET_PF_CONTINUE)
return r;

- if (handle_abnormal_pfn(vcpu, fault, walker.pte_access, &r))
+ r = handle_abnormal_pfn(vcpu, fault, walker.pte_access);
+ if (r != RET_PF_CONTINUE)
return r;

/*

base-commit: 150866cd0ec871c765181d145aa0912628289c8a
--
2.36.0.rc0.470.gd361397f0d-goog


2022-04-16 02:21:32

by David Matlack

[permalink] [raw]
Subject: Re: [PATCH] KVM: x86/mmu: Add RET_PF_CONTINUE to eliminate bool+int* "returns"

On Thu, Apr 14, 2022 at 5:51 PM Sean Christopherson <[email protected]> wrote:
>
> Add RET_PF_CONTINUE and use it in handle_abnormal_pfn() and
> kvm_faultin_pfn() to signal that the page fault handler should continue
> doing its thing. Aside from being gross and inefficient, using a boolean
> return to signal continue vs. stop makes it extremely difficult to add
> more helpers and/or move existing code to a helper.
>
> E.g. hypothetically, if nested MMUs were to gain a separate page fault
> handler in the future, everything up to the "is self-modifying PTE" check
> can be shared by all shadow MMUs, but communicating up the stack whether
> to continue on or stop becomes a nightmare.
>
> More concretely, proposed support for private guest memory ran into a
> similar issue, where it'll be forced to forego a helper in order to yield
> sane code: https://lore.kernel.org/all/YkJbxiL%[email protected].
>
> No functional change intended.
>
> Cc: David Matlack <[email protected]>
> Cc: Chao Peng <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
> arch/x86/kvm/mmu/mmu.c | 51 ++++++++++++++-------------------
> arch/x86/kvm/mmu/mmu_internal.h | 9 +++++-
> arch/x86/kvm/mmu/paging_tmpl.h | 6 ++--
> 3 files changed, 34 insertions(+), 32 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 69a30d6d1e2b..cb2982c6b513 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2972,14 +2972,12 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
> return -EFAULT;
> }
>
> -static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> - unsigned int access, int *ret_val)
> +static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> + unsigned int access)
> {
> /* The pfn is invalid, report the error! */
> - if (unlikely(is_error_pfn(fault->pfn))) {
> - *ret_val = kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
> - return true;
> - }
> + if (unlikely(is_error_pfn(fault->pfn)))
> + return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
>
> if (unlikely(!fault->slot)) {
> gva_t gva = fault->is_tdp ? 0 : fault->addr;
> @@ -2991,13 +2989,11 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
> * touching the shadow page tables as attempting to install an
> * MMIO SPTE will just be an expensive nop.
> */
> - if (unlikely(!shadow_mmio_value)) {
> - *ret_val = RET_PF_EMULATE;
> - return true;
> - }
> + if (unlikely(!shadow_mmio_value))
> + return RET_PF_EMULATE;
> }
>
> - return false;
> + return RET_PF_CONTINUE;
> }
>
> static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
> @@ -3888,7 +3884,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
> }
>
> -static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int *r)
> +static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> {
> struct kvm_memory_slot *slot = fault->slot;
> bool async;
> @@ -3899,7 +3895,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> * be zapped before KVM inserts a new MMIO SPTE for the gfn.
> */
> if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
> - goto out_retry;
> + return RET_PF_RETRY;
>
> if (!kvm_is_visible_memslot(slot)) {
> /* Don't expose private memslots to L2. */
> @@ -3907,7 +3903,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> fault->slot = NULL;
> fault->pfn = KVM_PFN_NOSLOT;
> fault->map_writable = false;
> - return false;
> + return RET_PF_CONTINUE;
> }
> /*
> * If the APIC access page exists but is disabled, go directly
> @@ -3916,10 +3912,8 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> * when the AVIC is re-enabled.
> */
> if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
> - !kvm_apicv_activated(vcpu->kvm)) {
> - *r = RET_PF_EMULATE;
> - return true;
> - }
> + !kvm_apicv_activated(vcpu->kvm))
> + return RET_PF_EMULATE;
> }
>
> async = false;
> @@ -3927,26 +3921,23 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> fault->write, &fault->map_writable,
> &fault->hva);
> if (!async)
> - return false; /* *pfn has correct page already */
> + return RET_PF_CONTINUE; /* *pfn has correct page already */
>
> if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
> trace_kvm_try_async_get_page(fault->addr, fault->gfn);
> if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) {
> trace_kvm_async_pf_doublefault(fault->addr, fault->gfn);
> kvm_make_request(KVM_REQ_APF_HALT, vcpu);
> - goto out_retry;
> - } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn))
> - goto out_retry;
> + return RET_PF_RETRY;
> + } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
> + return RET_PF_RETRY;
> + }
> }
>
> fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL,
> fault->write, &fault->map_writable,
> &fault->hva);
> - return false;
> -
> -out_retry:
> - *r = RET_PF_RETRY;
> - return true;
> + return RET_PF_CONTINUE;
> }
>
> /*
> @@ -4001,10 +3992,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> mmu_seq = vcpu->kvm->mmu_notifier_seq;
> smp_rmb();
>
> - if (kvm_faultin_pfn(vcpu, fault, &r))
> + r = kvm_faultin_pfn(vcpu, fault);
> + if (r != RET_PF_CONTINUE)
> return r;
>
> - if (handle_abnormal_pfn(vcpu, fault, ACC_ALL, &r))
> + r = handle_abnormal_pfn(vcpu, fault, ACC_ALL);
> + if (r != RET_PF_CONTINUE)
> return r;
>
> r = RET_PF_RETRY;
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 1bff453f7cbe..c0e502b17ef7 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -143,6 +143,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> /*
> * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
> *
> + * RET_PF_CONTINUE: So far, so good, keep handling the page fault.
> * RET_PF_RETRY: let CPU fault again on the address.
> * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
> * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.

RET_PF_CONTINUE and RET_PF_INVALID are pretty similar, they both
indicate to the PF handler that it should keep going. What do you
think about taking this a step further and removing RET_PF_INVALID and
RET_PF_CONTINUE and using 0 instead? That way we can replace:

if (ret != RET_PF_CONTINUE)
return r;

and

if (ret != RET_PF_INVALID)
return r;

with

if (r)
return r;

?

> @@ -151,9 +152,15 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> *
> * Any names added to this enum should be exported to userspace for use in
> * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h

Please add RET_PF_CONTINUE to mmutrace.h, e.g.

TRACE_DEFINE_ENUM(RET_PF_CONTINUE);

It doesn't matter in practice (yet) since the trace enums are only
used for trace_fast_page_fault, which does not return RET_PF_CONTINUE.
But it would be good to keep it up to date.

> + *
> + * Note, all values must be greater than or equal to zero so as not to encroach
> + * on -errno return values. Somewhat arbitrarily use '0' for CONTINUE, which
> + * will allow for efficient machine code when checking for CONTINUE, e.g.
> + * "TEST %rax, %rax, JNZ", as all "stop!" values are non-zero.
> */
> enum {
> - RET_PF_RETRY = 0,
> + RET_PF_CONTINUE = 0,
> + RET_PF_RETRY,
> RET_PF_EMULATE,
> RET_PF_INVALID,
> RET_PF_FIXED,
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 66f1acf153c4..7038273d04ab 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -838,10 +838,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> mmu_seq = vcpu->kvm->mmu_notifier_seq;
> smp_rmb();
>
> - if (kvm_faultin_pfn(vcpu, fault, &r))
> + r = kvm_faultin_pfn(vcpu, fault);
> + if (r != RET_PF_CONTINUE)
> return r;
>
> - if (handle_abnormal_pfn(vcpu, fault, walker.pte_access, &r))
> + r = handle_abnormal_pfn(vcpu, fault, walker.pte_access);
> + if (r != RET_PF_CONTINUE)
> return r;
>
> /*
>
> base-commit: 150866cd0ec871c765181d145aa0912628289c8a
> --
> 2.36.0.rc0.470.gd361397f0d-goog
>

2022-04-18 20:56:08

by Chao Peng

[permalink] [raw]
Subject: Re: [PATCH] KVM: x86/mmu: Add RET_PF_CONTINUE to eliminate bool+int* "returns"

On Fri, Apr 15, 2022 at 12:51:07AM +0000, Sean Christopherson wrote:
> Add RET_PF_CONTINUE and use it in handle_abnormal_pfn() and
> kvm_faultin_pfn() to signal that the page fault handler should continue
> doing its thing. Aside from being gross and inefficient, using a boolean
> return to signal continue vs. stop makes it extremely difficult to add
> more helpers and/or move existing code to a helper.
>
> E.g. hypothetically, if nested MMUs were to gain a separate page fault
> handler in the future, everything up to the "is self-modifying PTE" check
> can be shared by all shadow MMUs, but communicating up the stack whether
> to continue on or stop becomes a nightmare.
>
> More concretely, proposed support for private guest memory ran into a
> similar issue, where it'll be forced to forego a helper in order to yield
> sane code: https://lore.kernel.org/all/YkJbxiL%[email protected].

Thanks for cooking this patch, it makes private memory patch much
easier.

>
> No functional change intended.
>
> Cc: David Matlack <[email protected]>
> Cc: Chao Peng <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
> arch/x86/kvm/mmu/mmu.c | 51 ++++++++++++++-------------------
> arch/x86/kvm/mmu/mmu_internal.h | 9 +++++-
> arch/x86/kvm/mmu/paging_tmpl.h | 6 ++--
> 3 files changed, 34 insertions(+), 32 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 69a30d6d1e2b..cb2982c6b513 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2972,14 +2972,12 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
> return -EFAULT;
> }
>
> -static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> - unsigned int access, int *ret_val)
> +static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> + unsigned int access)
> {
> /* The pfn is invalid, report the error! */
> - if (unlikely(is_error_pfn(fault->pfn))) {
> - *ret_val = kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
> - return true;
> - }
> + if (unlikely(is_error_pfn(fault->pfn)))
> + return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
>
> if (unlikely(!fault->slot)) {
> gva_t gva = fault->is_tdp ? 0 : fault->addr;
> @@ -2991,13 +2989,11 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
> * touching the shadow page tables as attempting to install an
> * MMIO SPTE will just be an expensive nop.
> */
> - if (unlikely(!shadow_mmio_value)) {
> - *ret_val = RET_PF_EMULATE;
> - return true;
> - }
> + if (unlikely(!shadow_mmio_value))
> + return RET_PF_EMULATE;
> }
>
> - return false;
> + return RET_PF_CONTINUE;
> }
>
> static bool page_fault_can_be_fast(struct kvm_page_fault *fault)
> @@ -3888,7 +3884,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
> }
>
> -static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int *r)
> +static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> {
> struct kvm_memory_slot *slot = fault->slot;
> bool async;
> @@ -3899,7 +3895,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> * be zapped before KVM inserts a new MMIO SPTE for the gfn.
> */
> if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
> - goto out_retry;
> + return RET_PF_RETRY;
>
> if (!kvm_is_visible_memslot(slot)) {
> /* Don't expose private memslots to L2. */
> @@ -3907,7 +3903,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> fault->slot = NULL;
> fault->pfn = KVM_PFN_NOSLOT;
> fault->map_writable = false;
> - return false;
> + return RET_PF_CONTINUE;
> }
> /*
> * If the APIC access page exists but is disabled, go directly
> @@ -3916,10 +3912,8 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> * when the AVIC is re-enabled.
> */
> if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
> - !kvm_apicv_activated(vcpu->kvm)) {
> - *r = RET_PF_EMULATE;
> - return true;
> - }
> + !kvm_apicv_activated(vcpu->kvm))
> + return RET_PF_EMULATE;
> }
>
> async = false;
> @@ -3927,26 +3921,23 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> fault->write, &fault->map_writable,
> &fault->hva);
> if (!async)
> - return false; /* *pfn has correct page already */
> + return RET_PF_CONTINUE; /* *pfn has correct page already */
>
> if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
> trace_kvm_try_async_get_page(fault->addr, fault->gfn);
> if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) {
> trace_kvm_async_pf_doublefault(fault->addr, fault->gfn);
> kvm_make_request(KVM_REQ_APF_HALT, vcpu);
> - goto out_retry;
> - } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn))
> - goto out_retry;
> + return RET_PF_RETRY;
> + } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
> + return RET_PF_RETRY;
> + }
> }
>
> fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL,
> fault->write, &fault->map_writable,
> &fault->hva);
> - return false;
> -
> -out_retry:
> - *r = RET_PF_RETRY;
> - return true;
> + return RET_PF_CONTINUE;
> }
>
> /*
> @@ -4001,10 +3992,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> mmu_seq = vcpu->kvm->mmu_notifier_seq;
> smp_rmb();
>
> - if (kvm_faultin_pfn(vcpu, fault, &r))
> + r = kvm_faultin_pfn(vcpu, fault);
> + if (r != RET_PF_CONTINUE)
> return r;
>
> - if (handle_abnormal_pfn(vcpu, fault, ACC_ALL, &r))
> + r = handle_abnormal_pfn(vcpu, fault, ACC_ALL);
> + if (r != RET_PF_CONTINUE)
> return r;
>
> r = RET_PF_RETRY;
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 1bff453f7cbe..c0e502b17ef7 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -143,6 +143,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> /*
> * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
> *
> + * RET_PF_CONTINUE: So far, so good, keep handling the page fault.
> * RET_PF_RETRY: let CPU fault again on the address.
> * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
> * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
> @@ -151,9 +152,15 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> *
> * Any names added to this enum should be exported to userspace for use in
> * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h
> + *
> + * Note, all values must be greater than or equal to zero so as not to encroach
> + * on -errno return values. Somewhat arbitrarily use '0' for CONTINUE, which
> + * will allow for efficient machine code when checking for CONTINUE, e.g.
> + * "TEST %rax, %rax, JNZ", as all "stop!" values are non-zero.
> */
> enum {
> - RET_PF_RETRY = 0,
> + RET_PF_CONTINUE = 0,
> + RET_PF_RETRY,
> RET_PF_EMULATE,
> RET_PF_INVALID,
> RET_PF_FIXED,

Unrelated to this patch but related to private memory patch, when
implicit conversion happens, I'm thinking which return value I should
use. Current -1 does not make much sense since it causes KVM_RUN
returns an error while it's expected to be 0. None of the above
including RET_PF_CONTINUE seems appropriate. Maybe we should go further
to introduce another RET_PF_USER indicating we should exit to
userspace for handling with a return value of 0 instead of -error in
kvm_mmu_page_fault().

Thanks,
Chao
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 66f1acf153c4..7038273d04ab 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -838,10 +838,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> mmu_seq = vcpu->kvm->mmu_notifier_seq;
> smp_rmb();
>
> - if (kvm_faultin_pfn(vcpu, fault, &r))
> + r = kvm_faultin_pfn(vcpu, fault);
> + if (r != RET_PF_CONTINUE)
> return r;
>
> - if (handle_abnormal_pfn(vcpu, fault, walker.pte_access, &r))
> + r = handle_abnormal_pfn(vcpu, fault, walker.pte_access);
> + if (r != RET_PF_CONTINUE)
> return r;
>
> /*
>
> base-commit: 150866cd0ec871c765181d145aa0912628289c8a
> --
> 2.36.0.rc0.470.gd361397f0d-goog