2024-04-19 09:00:01

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH 3/6] KVM: x86/mmu: Extract __kvm_mmu_do_page_fault()

From: Isaku Yamahata <[email protected]>

Extract out __kvm_mmu_do_page_fault() from kvm_mmu_do_page_fault(). The
inner function is to initialize struct kvm_page_fault and to call the fault
handler, and the outer function handles updating stats and converting
return code. KVM_PRE_FAULT_MEMORY will call the KVM page fault handler.

This patch makes the emulation_type always set irrelevant to the return
code. kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(),
and references the value only when PF_RET_EMULATE is returned. Therefore,
this adjustment doesn't affect functionality.

No functional change intended.

Suggested-by: Sean Christopherson <[email protected]>
Signed-off-by: Isaku Yamahata <[email protected]>
Message-ID: <ddf1d98420f562707b11e12c416cce8fdb986bb1.1712785629.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/mmu/mmu_internal.h | 38 +++++++++++++++++++++------------
1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index e68a60974cf4..9baae6c223ee 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -287,8 +287,8 @@ static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
fault->is_private);
}

-static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
- u64 err, bool prefetch, int *emulation_type)
+static inline int __kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ u64 err, bool prefetch, int *emulation_type)
{
struct kvm_page_fault fault = {
.addr = cr2_or_gpa,
@@ -318,6 +318,27 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);
}

+ if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
+ r = kvm_tdp_page_fault(vcpu, &fault);
+ else
+ r = vcpu->arch.mmu->page_fault(vcpu, &fault);
+
+ if (r == RET_PF_EMULATE && fault.is_private) {
+ kvm_mmu_prepare_memory_fault_exit(vcpu, &fault);
+ r = -EFAULT;
+ }
+
+ if (fault.write_fault_to_shadow_pgtable && emulation_type)
+ *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
+
+ return r;
+}
+
+static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ u64 err, bool prefetch, int *emulation_type)
+{
+ int r;
+
/*
* Async #PF "faults", a.k.a. prefetch faults, are not faults from the
* guest perspective and have already been counted at the time of the
@@ -326,18 +347,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
if (!prefetch)
vcpu->stat.pf_taken++;

- if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
- r = kvm_tdp_page_fault(vcpu, &fault);
- else
- r = vcpu->arch.mmu->page_fault(vcpu, &fault);
-
- if (r == RET_PF_EMULATE && fault.is_private) {
- kvm_mmu_prepare_memory_fault_exit(vcpu, &fault);
- return -EFAULT;
- }
-
- if (fault.write_fault_to_shadow_pgtable && emulation_type)
- *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
+ r = __kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, err, prefetch, emulation_type);

/*
* Similar to above, prefetch faults aren't truly spurious, and the
--
2.43.0




2024-04-22 08:47:24

by Xiaoyao Li

[permalink] [raw]
Subject: Re: [PATCH 3/6] KVM: x86/mmu: Extract __kvm_mmu_do_page_fault()

On 4/19/2024 4:59 PM, Paolo Bonzini wrote:
> From: Isaku Yamahata <[email protected]>
>
> Extract out __kvm_mmu_do_page_fault() from kvm_mmu_do_page_fault(). The
> inner function is to initialize struct kvm_page_fault and to call the fault
> handler, and the outer function handles updating stats and converting
> return code.

I don't see how the outer function converts return code.

> KVM_PRE_FAULT_MEMORY will call the KVM page fault handler.

I assume it means the inner function will be used by KVM_PRE_FAULT_MEMORY.

> This patch makes the emulation_type always set irrelevant to the return
> code. kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(),
> and references the value only when PF_RET_EMULATE is returned. Therefore,
> this adjustment doesn't affect functionality.

This paragraph needs to be removed, I think. It's not true.

> No functional change intended.
>
> Suggested-by: Sean Christopherson <[email protected]>
> Signed-off-by: Isaku Yamahata <[email protected]>
> Message-ID: <ddf1d98420f562707b11e12c416cce8fdb986bb1.1712785629.git.isaku.yamahata@intel.com>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/mmu/mmu_internal.h | 38 +++++++++++++++++++++------------
> 1 file changed, 24 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index e68a60974cf4..9baae6c223ee 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -287,8 +287,8 @@ static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
> fault->is_private);
> }
>
> -static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> - u64 err, bool prefetch, int *emulation_type)
> +static inline int __kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> + u64 err, bool prefetch, int *emulation_type)
> {
> struct kvm_page_fault fault = {
> .addr = cr2_or_gpa,
> @@ -318,6 +318,27 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);
> }
>
> + if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
> + r = kvm_tdp_page_fault(vcpu, &fault);
> + else
> + r = vcpu->arch.mmu->page_fault(vcpu, &fault);
> +
> + if (r == RET_PF_EMULATE && fault.is_private) {
> + kvm_mmu_prepare_memory_fault_exit(vcpu, &fault);
> + r = -EFAULT;
> + }
> +
> + if (fault.write_fault_to_shadow_pgtable && emulation_type)
> + *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
> +
> + return r;
> +}
> +
> +static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> + u64 err, bool prefetch, int *emulation_type)
> +{
> + int r;
> +
> /*
> * Async #PF "faults", a.k.a. prefetch faults, are not faults from the
> * guest perspective and have already been counted at the time of the
> @@ -326,18 +347,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> if (!prefetch)
> vcpu->stat.pf_taken++;
>
> - if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
> - r = kvm_tdp_page_fault(vcpu, &fault);
> - else
> - r = vcpu->arch.mmu->page_fault(vcpu, &fault);
> -
> - if (r == RET_PF_EMULATE && fault.is_private) {
> - kvm_mmu_prepare_memory_fault_exit(vcpu, &fault);
> - return -EFAULT;
> - }
> -
> - if (fault.write_fault_to_shadow_pgtable && emulation_type)
> - *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
> + r = __kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, err, prefetch, emulation_type);
>
> /*
> * Similar to above, prefetch faults aren't truly spurious, and the


2024-06-12 20:47:56

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 3/6] KVM: x86/mmu: Extract __kvm_mmu_do_page_fault()

On Mon, Apr 22, 2024, Xiaoyao Li wrote:
> On 4/19/2024 4:59 PM, Paolo Bonzini wrote:
> > From: Isaku Yamahata <[email protected]>
> >
> > Extract out __kvm_mmu_do_page_fault() from kvm_mmu_do_page_fault(). The
> > inner function is to initialize struct kvm_page_fault and to call the fault
> > handler, and the outer function handles updating stats and converting
> > return code.
>
> I don't see how the outer function converts return code.
>
> > KVM_PRE_FAULT_MEMORY will call the KVM page fault handler.
>
> I assume it means the inner function will be used by KVM_PRE_FAULT_MEMORY.
>
> > This patch makes the emulation_type always set irrelevant to the return
> > code. kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(),
> > and references the value only when PF_RET_EMULATE is returned. Therefore,
> > this adjustment doesn't affect functionality.
>
> This paragraph needs to be removed, I think. It's not true.

It's oddly worded, but I do think it's correct. kvm_arch_async_page_ready()
doesn't pass emulation_type, and kvm_mmu_page_fault() bails early for all other
return values:

if (r < 0)
return r;
if (r != RET_PF_EMULATE)
return 1;

That said, this belongs in a separate patch (if it's actually necessary).

And _that_ said, rather than add an inner version, what if we instead shuffle the
stats code? pf_taken, pf_spurious, and pf_emulate should _only_ ever be bumped
by kvm_mmu_page_fault(), i.e. should only track page faults that actually happened
in the guest. And so handling them in kvm_mmu_do_page_fault() doesn't make any
sense, because there should only ever be one caller that passes prefetch=false.

Compile tested only, and kvm_mmu_page_fault() is a bit ugly (but that's solvable),
but I think this would provide better separation of concerns.

--
From: Sean Christopherson <[email protected]>
Date: Wed, 12 Jun 2024 12:51:38 -0700
Subject: [PATCH 1/2] KVM: x86/mmu: Bump pf_taken stat only in the "real" page
fault handler

Account stat.pf_taken in kvm_mmu_page_fault(), i.e. the actual page fault
handler, instead of conditionally bumping it in kvm_mmu_do_page_fault().
The "real" page fault handler is the only path that should ever increment
the number of taken page faults, as all other paths that "do page fault"
are by definition not handling faults that occurred in the guest.

Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/kvm/mmu/mmu.c | 2 ++
arch/x86/kvm/mmu/mmu_internal.h | 8 --------
2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f2c9580d9588..3461b8c4aba2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5928,6 +5928,8 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
}

if (r == RET_PF_INVALID) {
+ vcpu->stat.pf_taken++;
+
r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false,
&emulation_type);
if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index ce2fcd19ba6b..8efd31b3856b 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -318,14 +318,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);
}

- /*
- * Async #PF "faults", a.k.a. prefetch faults, are not faults from the
- * guest perspective and have already been counted at the time of the
- * original fault.
- */
- if (!prefetch)
- vcpu->stat.pf_taken++;
-
if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
r = kvm_tdp_page_fault(vcpu, &fault);
else

base-commit: b7bc82a015e237862837bd1300d6ba1f5cd17466
--
2.45.2.505.gda0bf45e8d-goog

From 1dc69d38a8d51c9d8ad833475938cb925f7ea4cf Mon Sep 17 00:00:00 2001
From: Sean Christopherson <[email protected]>
Date: Wed, 12 Jun 2024 12:59:06 -0700
Subject: [PATCH 2/2] KVM: x86/mmu: Account pf_{fixed,emulate,spurious} in
callers of "do page fault"

Move the accounting of the result of kvm_mmu_do_page_fault() to its
callers, as only pf_fixed is common to guest page faults and async #PFs,
and upcoming support KVM_PRE_FAULT_MEMORY won't bump _any_ stats.

Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/kvm/mmu/mmu.c | 19 ++++++++++++++++++-
arch/x86/kvm/mmu/mmu_internal.h | 13 -------------
2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3461b8c4aba2..56373577a197 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4291,7 +4291,16 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
return;

- kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL);
+ r = kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code,
+ true, NULL);
+
+ /*
+ * Account fixed page faults, otherwise they'll never be counted, but
+ * ignore stats for all other return times. Page-ready "faults" aren't
+ * truly spurious and never trigger emulation
+ */
+ if (r == RET_PF_FIXED)
+ vcpu->stat.pf_fixed++;
}

static inline u8 kvm_max_level_for_order(int order)
@@ -5938,6 +5947,14 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err

if (r < 0)
return r;
+
+ if (r == RET_PF_FIXED)
+ vcpu->stat.pf_fixed++;
+ else if (r == RET_PF_EMULATE)
+ vcpu->stat.pf_emulate++;
+ else if (r == RET_PF_SPURIOUS)
+ vcpu->stat.pf_spurious++;
+
if (r != RET_PF_EMULATE)
return 1;

diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 8efd31b3856b..444f55a5eed7 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -337,19 +337,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
if (fault.write_fault_to_shadow_pgtable && emulation_type)
*emulation_type |= EMULTYPE_WRITE_PF_TO_SP;

- /*
- * Similar to above, prefetch faults aren't truly spurious, and the
- * async #PF path doesn't do emulation. Do count faults that are fixed
- * by the async #PF handler though, otherwise they'll never be counted.
- */
- if (r == RET_PF_FIXED)
- vcpu->stat.pf_fixed++;
- else if (prefetch)
- ;
- else if (r == RET_PF_EMULATE)
- vcpu->stat.pf_emulate++;
- else if (r == RET_PF_SPURIOUS)
- vcpu->stat.pf_spurious++;
return r;
}

--