On Sun, Jun 11, 2023 at 11:25:12PM -0500,
Michael Roth <[email protected]> wrote:
> This will be used to determine whether or not an #NPF should be serviced
> using a normal page vs. a guarded/gmem one.
>
> Signed-off-by: Michael Roth <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 7 +++++++
> arch/x86/kvm/mmu/mmu_internal.h | 35 ++++++++++++++++++++++++++++++++-
> 2 files changed, 41 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b3bd24f2a390..c26f76641121 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1445,6 +1445,13 @@ struct kvm_arch {
> */
> #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
> struct kvm_mmu_memory_cache split_desc_cache;
> +
> + /*
> + * When set, used to determine whether a fault should be treated as
> + * private in the context of protected VMs which use a separate gmem
> + * pool to back private guest pages.
> + */
> + u64 mmu_private_fault_mask;
> };
>
> struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 780b91e1da9f..9b9e75aa43f4 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -252,6 +252,39 @@ struct kvm_page_fault {
>
> int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
>
> +static bool kvm_mmu_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 err)
> +{
> + struct kvm_memory_slot *slot;
> + bool private_fault = false;
> + gfn_t gfn = gpa_to_gfn(gpa);
> +
> + slot = gfn_to_memslot(kvm, gfn);
> + if (!slot) {
> + pr_debug("%s: no slot, GFN: 0x%llx\n", __func__, gfn);
> + goto out;
> + }
> +
> + if (!kvm_slot_can_be_private(slot)) {
> + pr_debug("%s: slot is not private, GFN: 0x%llx\n", __func__, gfn);
> + goto out;
> + }
> +
> + if (kvm->arch.mmu_private_fault_mask) {
> + private_fault = !!(err & kvm->arch.mmu_private_fault_mask);
> + goto out;
> + }
What's the convention of err? Can we abstract it by introducing a new bit
PFERR_PRIVATE_MASK? The caller sets it based on arch specific value.
the logic will be
.is_private = err & PFERR_PRIVATE_MASK;
> +
> + /*
> + * Handling below is for UPM self-tests and guests that treat userspace
> + * as the authority on whether a fault should be private or not.
> + */
> + private_fault = kvm_mem_is_private(kvm, gpa >> PAGE_SHIFT);
This code path is sad. One extra slot lookup and xarray look up.
Without mmu lock, the result can change by other vcpu.
Let's find a better way.
> +
> +out:
> + pr_debug("%s: GFN: 0x%llx, private: %d\n", __func__, gfn, private_fault);
> + return private_fault;
> +}
> +
> /*
> * Return values of handle_mmio_page_fault(), mmu.page_fault(), fast_page_fault(),
> * and of course kvm_mmu_do_page_fault().
> @@ -301,7 +334,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> .max_level = KVM_MAX_HUGEPAGE_LEVEL,
> .req_level = PG_LEVEL_4K,
> .goal_level = PG_LEVEL_4K,
> - .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
> + .is_private = kvm_mmu_fault_is_private(vcpu->kvm, cr2_or_gpa, err),
> };
> int r;
>
> --
> 2.25.1
>
--
Isaku Yamahata <[email protected]>
On Wed, Jun 14, 2023 at 09:47:09AM -0700, Isaku Yamahata wrote:
> On Sun, Jun 11, 2023 at 11:25:12PM -0500,
> Michael Roth <[email protected]> wrote:
>
> > This will be used to determine whether or not an #NPF should be serviced
> > using a normal page vs. a guarded/gmem one.
> >
> > Signed-off-by: Michael Roth <[email protected]>
> > ---
> > arch/x86/include/asm/kvm_host.h | 7 +++++++
> > arch/x86/kvm/mmu/mmu_internal.h | 35 ++++++++++++++++++++++++++++++++-
> > 2 files changed, 41 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index b3bd24f2a390..c26f76641121 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1445,6 +1445,13 @@ struct kvm_arch {
> > */
> > #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
> > struct kvm_mmu_memory_cache split_desc_cache;
> > +
> > + /*
> > + * When set, used to determine whether a fault should be treated as
> > + * private in the context of protected VMs which use a separate gmem
> > + * pool to back private guest pages.
> > + */
> > + u64 mmu_private_fault_mask;
> > };
> >
> > struct kvm_vm_stat {
> > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > index 780b91e1da9f..9b9e75aa43f4 100644
> > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > @@ -252,6 +252,39 @@ struct kvm_page_fault {
> >
> > int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
> >
> > +static bool kvm_mmu_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 err)
> > +{
> > + struct kvm_memory_slot *slot;
> > + bool private_fault = false;
> > + gfn_t gfn = gpa_to_gfn(gpa);
> > +
> > + slot = gfn_to_memslot(kvm, gfn);
> > + if (!slot) {
> > + pr_debug("%s: no slot, GFN: 0x%llx\n", __func__, gfn);
> > + goto out;
> > + }
> > +
> > + if (!kvm_slot_can_be_private(slot)) {
> > + pr_debug("%s: slot is not private, GFN: 0x%llx\n", __func__, gfn);
> > + goto out;
> > + }
> > +
> > + if (kvm->arch.mmu_private_fault_mask) {
> > + private_fault = !!(err & kvm->arch.mmu_private_fault_mask);
> > + goto out;
> > + }
>
> What's the convention of err? Can we abstract it by introducing a new bit
> PFERR_PRIVATE_MASK? The caller sets it based on arch specific value.
> the logic will be
> .is_private = err & PFERR_PRIVATE_MASK;
I'm not sure I understand the question. 'err' is just the page fault flags,
and arch.mmu_private_fault_mask is something that can be set on a
per-platform basis when running in a mode where shared/private access
is recorded in the page fault flags during a #NPF.
I'm not sure how we'd keep the handling cross-platform by moving to a macro,
since TDX uses a different bit, and we'd want to be able to build a
SNP+TDX kernel that could run on either type of hardware.
Are you suggesting to reverse that and have err be set in a platform-specific
way and then use a common PFERR_PRIVATE_MASK that's software-defined and
consistent across platforms? That could work, but existing handling seems
to use page fault flags as-is, keeping the hardware-set values, rather than
modifying them to pass additional metadata, so it seems like it might
make things more confusing to make an exception to that here. Or are
there other cases where it's done that way?
>
>
> > +
> > + /*
> > + * Handling below is for UPM self-tests and guests that treat userspace
> > + * as the authority on whether a fault should be private or not.
> > + */
> > + private_fault = kvm_mem_is_private(kvm, gpa >> PAGE_SHIFT);
>
> This code path is sad. One extra slot lookup and xarray look up.
> Without mmu lock, the result can change by other vcpu.
> Let's find a better way.
The intention was to rely on fault->mmu_seq to determine if a
KVM_SET_MEMORY_ATTRIBUTES update came in after .private_fault was set so
that fault handling could be retried, but that doesn't happen until
kvm_faultin_pfn() which is *after* this is logged. So yes, I think there
is a race here, and the approach you took in your Misc. series of
keeping the kvm_mem_is_private() check inside kvm_faultin_pfn() is more
efficient/correct.
If we can figure out a way to handle checking the fault flags in a way
that works for both TDX/SNP (and KVM self-test use-case) we can
consolidate around that.
-Mike
>
> > +
> > +out:
> > + pr_debug("%s: GFN: 0x%llx, private: %d\n", __func__, gfn, private_fault);
> > + return private_fault;
> > +}
> > +
> > /*
> > * Return values of handle_mmio_page_fault(), mmu.page_fault(), fast_page_fault(),
> > * and of course kvm_mmu_do_page_fault().
> > @@ -301,7 +334,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> > .max_level = KVM_MAX_HUGEPAGE_LEVEL,
> > .req_level = PG_LEVEL_4K,
> > .goal_level = PG_LEVEL_4K,
> > - .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
> > + .is_private = kvm_mmu_fault_is_private(vcpu->kvm, cr2_or_gpa, err),
> > };
> > int r;
> >
> > --
> > 2.25.1
> >
>
> --
> Isaku Yamahata <[email protected]>
On Tue, Jun 20, 2023 at 03:28:41PM -0500,
Michael Roth <[email protected]> wrote:
> On Wed, Jun 14, 2023 at 09:47:09AM -0700, Isaku Yamahata wrote:
> > On Sun, Jun 11, 2023 at 11:25:12PM -0500,
> > Michael Roth <[email protected]> wrote:
> >
> > > This will be used to determine whether or not an #NPF should be serviced
> > > using a normal page vs. a guarded/gmem one.
> > >
> > > Signed-off-by: Michael Roth <[email protected]>
> > > ---
> > > arch/x86/include/asm/kvm_host.h | 7 +++++++
> > > arch/x86/kvm/mmu/mmu_internal.h | 35 ++++++++++++++++++++++++++++++++-
> > > 2 files changed, 41 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > index b3bd24f2a390..c26f76641121 100644
> > > --- a/arch/x86/include/asm/kvm_host.h
> > > +++ b/arch/x86/include/asm/kvm_host.h
> > > @@ -1445,6 +1445,13 @@ struct kvm_arch {
> > > */
> > > #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
> > > struct kvm_mmu_memory_cache split_desc_cache;
> > > +
> > > + /*
> > > + * When set, used to determine whether a fault should be treated as
> > > + * private in the context of protected VMs which use a separate gmem
> > > + * pool to back private guest pages.
> > > + */
> > > + u64 mmu_private_fault_mask;
> > > };
> > >
> > > struct kvm_vm_stat {
> > > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > > index 780b91e1da9f..9b9e75aa43f4 100644
> > > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > > @@ -252,6 +252,39 @@ struct kvm_page_fault {
> > >
> > > int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
> > >
> > > +static bool kvm_mmu_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 err)
> > > +{
> > > + struct kvm_memory_slot *slot;
> > > + bool private_fault = false;
> > > + gfn_t gfn = gpa_to_gfn(gpa);
> > > +
> > > + slot = gfn_to_memslot(kvm, gfn);
> > > + if (!slot) {
> > > + pr_debug("%s: no slot, GFN: 0x%llx\n", __func__, gfn);
> > > + goto out;
> > > + }
> > > +
> > > + if (!kvm_slot_can_be_private(slot)) {
> > > + pr_debug("%s: slot is not private, GFN: 0x%llx\n", __func__, gfn);
> > > + goto out;
> > > + }
> > > +
> > > + if (kvm->arch.mmu_private_fault_mask) {
> > > + private_fault = !!(err & kvm->arch.mmu_private_fault_mask);
> > > + goto out;
> > > + }
> >
> > What's the convention of err? Can we abstract it by introducing a new bit
> > PFERR_PRIVATE_MASK? The caller sets it based on arch specific value.
> > the logic will be
> > .is_private = err & PFERR_PRIVATE_MASK;
>
> I'm not sure I understand the question. 'err' is just the page fault flags,
> and arch.mmu_private_fault_mask is something that can be set on a
> per-platform basis when running in a mode where shared/private access
> is recorded in the page fault flags during a #NPF.
>
> I'm not sure how we'd keep the handling cross-platform by moving to a macro,
> since TDX uses a different bit, and we'd want to be able to build a
> SNP+TDX kernel that could run on either type of hardware.
>
> Are you suggesting to reverse that and have err be set in a platform-specific
> way and then use a common PFERR_PRIVATE_MASK that's software-defined and
> consistent across platforms? That could work, but existing handling seems
> to use page fault flags as-is, keeping the hardware-set values, rather than
> modifying them to pass additional metadata, so it seems like it might
> make things more confusing to make an exception to that here. Or are
> there other cases where it's done that way?
I meant the latter, making PFERR_PRIVATE_MASK common software-defined.
I think the SVM fault handler can use hardware value directly by carefully
defining those PFERR values.
TDX doesn't have architectural bit in error code to indicate the private fault.
It's coded in faulted address as shared bit. GPA bit 51 or 47.
PFERR_{USER, WRITE, FETCH, PRESENT} are already software-defined value for VMX
(and TDX). The fault handler for VMX, handle_ept_violation(), converts
encoding. For TDX, PFERR_PRIVATE_MASK is just one more software defined bit.
I'm fine with either way, variable or macro. Which do you prefer?
- Define variable mmu_private_fault_mask (global or per struct kvm)
The module initialization code, hardware_setup(), sets mmu_private_fault_mask.
- Define the software defined value, PFERR_PRIVATE_MASK.
The caller of kvm_mmu_page_fault() parses the hardware value and construct
software defined error_code.
- any other?
> > > +
> > > + /*
> > > + * Handling below is for UPM self-tests and guests that treat userspace
> > > + * as the authority on whether a fault should be private or not.
> > > + */
> > > + private_fault = kvm_mem_is_private(kvm, gpa >> PAGE_SHIFT);
> >
> > This code path is sad. One extra slot lookup and xarray look up.
> > Without mmu lock, the result can change by other vcpu.
> > Let's find a better way.
>
> The intention was to rely on fault->mmu_seq to determine if a
> KVM_SET_MEMORY_ATTRIBUTES update came in after .private_fault was set so
> that fault handling could be retried, but that doesn't happen until
> kvm_faultin_pfn() which is *after* this is logged. So yes, I think there
> is a race here, and the approach you took in your Misc. series of
> keeping the kvm_mem_is_private() check inside kvm_faultin_pfn() is more
> efficient/correct.
>
> If we can figure out a way to handle checking the fault flags in a way
> that works for both TDX/SNP (and KVM self-test use-case) we can
> consolidate around that.
I can think of the following ways. I think the second option is better because
we don't need exit bit for error code.
- Introduce software defined error code
- Add a flags to struct kvm_arch for self-test use-case VM_TYPE_PROTECTED_VM.
Set it to true for VM_TYPE_PROTECTED_VM case.
- any other?
Thanks,
--
Isaku Yamahata <[email protected]>
On Tue, Jun 20, 2023 at 02:18:45PM -0700, Isaku Yamahata wrote:
> On Tue, Jun 20, 2023 at 03:28:41PM -0500,
> Michael Roth <[email protected]> wrote:
>
> > On Wed, Jun 14, 2023 at 09:47:09AM -0700, Isaku Yamahata wrote:
> > > On Sun, Jun 11, 2023 at 11:25:12PM -0500,
> > > Michael Roth <[email protected]> wrote:
> > >
> > > > This will be used to determine whether or not an #NPF should be serviced
> > > > using a normal page vs. a guarded/gmem one.
> > > >
> > > > Signed-off-by: Michael Roth <[email protected]>
> > > > ---
> > > > arch/x86/include/asm/kvm_host.h | 7 +++++++
> > > > arch/x86/kvm/mmu/mmu_internal.h | 35 ++++++++++++++++++++++++++++++++-
> > > > 2 files changed, 41 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > > index b3bd24f2a390..c26f76641121 100644
> > > > --- a/arch/x86/include/asm/kvm_host.h
> > > > +++ b/arch/x86/include/asm/kvm_host.h
> > > > @@ -1445,6 +1445,13 @@ struct kvm_arch {
> > > > */
> > > > #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
> > > > struct kvm_mmu_memory_cache split_desc_cache;
> > > > +
> > > > + /*
> > > > + * When set, used to determine whether a fault should be treated as
> > > > + * private in the context of protected VMs which use a separate gmem
> > > > + * pool to back private guest pages.
> > > > + */
> > > > + u64 mmu_private_fault_mask;
> > > > };
> > > >
> > > > struct kvm_vm_stat {
> > > > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > > > index 780b91e1da9f..9b9e75aa43f4 100644
> > > > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > > > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > > > @@ -252,6 +252,39 @@ struct kvm_page_fault {
> > > >
> > > > int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
> > > >
> > > > +static bool kvm_mmu_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 err)
> > > > +{
> > > > + struct kvm_memory_slot *slot;
> > > > + bool private_fault = false;
> > > > + gfn_t gfn = gpa_to_gfn(gpa);
> > > > +
> > > > + slot = gfn_to_memslot(kvm, gfn);
> > > > + if (!slot) {
> > > > + pr_debug("%s: no slot, GFN: 0x%llx\n", __func__, gfn);
> > > > + goto out;
> > > > + }
> > > > +
> > > > + if (!kvm_slot_can_be_private(slot)) {
> > > > + pr_debug("%s: slot is not private, GFN: 0x%llx\n", __func__, gfn);
> > > > + goto out;
> > > > + }
> > > > +
> > > > + if (kvm->arch.mmu_private_fault_mask) {
> > > > + private_fault = !!(err & kvm->arch.mmu_private_fault_mask);
> > > > + goto out;
> > > > + }
> > >
> > > What's the convention of err? Can we abstract it by introducing a new bit
> > > PFERR_PRIVATE_MASK? The caller sets it based on arch specific value.
> > > the logic will be
> > > .is_private = err & PFERR_PRIVATE_MASK;
> >
> > I'm not sure I understand the question. 'err' is just the page fault flags,
> > and arch.mmu_private_fault_mask is something that can be set on a
> > per-platform basis when running in a mode where shared/private access
> > is recorded in the page fault flags during a #NPF.
> >
> > I'm not sure how we'd keep the handling cross-platform by moving to a macro,
> > since TDX uses a different bit, and we'd want to be able to build a
> > SNP+TDX kernel that could run on either type of hardware.
> >
> > Are you suggesting to reverse that and have err be set in a platform-specific
> > way and then use a common PFERR_PRIVATE_MASK that's software-defined and
> > consistent across platforms? That could work, but existing handling seems
> > to use page fault flags as-is, keeping the hardware-set values, rather than
> > modifying them to pass additional metadata, so it seems like it might
> > make things more confusing to make an exception to that here. Or are
> > there other cases where it's done that way?
>
> I meant the latter, making PFERR_PRIVATE_MASK common software-defined.
>
> I think the SVM fault handler can use hardware value directly by carefully
> defining those PFERR values.
>
> TDX doesn't have architectural bit in error code to indicate the private fault.
> It's coded in faulted address as shared bit. GPA bit 51 or 47.
> PFERR_{USER, WRITE, FETCH, PRESENT} are already software-defined value for VMX
> (and TDX). The fault handler for VMX, handle_ept_violation(), converts
> encoding. For TDX, PFERR_PRIVATE_MASK is just one more software defined bit.
>
> I'm fine with either way, variable or macro. Which do you prefer?
>
> - Define variable mmu_private_fault_mask (global or per struct kvm)
> The module initialization code, hardware_setup(), sets mmu_private_fault_mask.
> - Define the software defined value, PFERR_PRIVATE_MASK.
> The caller of kvm_mmu_page_fault() parses the hardware value and construct
> software defined error_code.
> - any other?
>
>
> > > > +
> > > > + /*
> > > > + * Handling below is for UPM self-tests and guests that treat userspace
> > > > + * as the authority on whether a fault should be private or not.
> > > > + */
> > > > + private_fault = kvm_mem_is_private(kvm, gpa >> PAGE_SHIFT);
> > >
> > > This code path is sad. One extra slot lookup and xarray look up.
> > > Without mmu lock, the result can change by other vcpu.
> > > Let's find a better way.
> >
> > The intention was to rely on fault->mmu_seq to determine if a
> > KVM_SET_MEMORY_ATTRIBUTES update came in after .private_fault was set so
> > that fault handling could be retried, but that doesn't happen until
> > kvm_faultin_pfn() which is *after* this is logged. So yes, I think there
> > is a race here, and the approach you took in your Misc. series of
> > keeping the kvm_mem_is_private() check inside kvm_faultin_pfn() is more
> > efficient/correct.
> >
> > If we can figure out a way to handle checking the fault flags in a way
> > that works for both TDX/SNP (and KVM self-test use-case) we can
> > consolidate around that.
>
> I can think of the following ways. I think the second option is better because
> we don't need exit bit for error code.
>
> - Introduce software defined error code
> - Add a flags to struct kvm_arch for self-test use-case VM_TYPE_PROTECTED_VM.
> Set it to true for VM_TYPE_PROTECTED_VM case.
> - any other?
Vishal: hoping to get your thoughts here as well from the perspective of
the KVM self-test use-case.
I was thinking that once we set fault->is_private, that sort of
becomes our "software-defined" bit, and what KVM would use from that
point forward to determine whether or not the access should be treated
as a private one or not, and that whatever handler sets
fault->is_private would encapsulate away all the platform-specific
bit-checking needed to do that.
So if we were to straight-forwardly implement that based on how TDX
currently handles checking for the shared bit in GPA, paired with how
SEV-SNP handles checking for private bit in fault flags, it would look
something like:
bool kvm_fault_is_private(kvm, gpa, err)
{
/* SEV-SNP handling */
if (kvm->arch.mmu_private_fault_mask)
return !!(err & arch.mmu_private_fault_mask);
/* TDX handling */
if (kvm->arch.gfn_shared_mask)
return !!(gpa & arch.gfn_shared_mask);
return false;
}
kvm_mmu_do_page_fault(vcpu, gpa, err, ...)
{
struct kvm_page_fault fault = {
...
.is_private = kvm_fault_is_private(vcpu->kvm, gpa, err)
};
...
}
And then arch.mmu_private_fault_mask and arch.gfn_shared_mask would be
set per-KVM-instance, just like they are now with current SNP and TDX
patchsets, since stuff like KVM self-test wouldn't be setting those
masks, so it makes sense to do it per-instance in that regard.
But that still gets a little awkward for the KVM self-test use-case where
.is_private should sort of be ignored in favor of whatever the xarray
reports via kvm_mem_is_private(). In your Misc. series I believe you
handled this by introducing a PFERR_HASATTR_MASK bit so we can determine
whether existing value of fault->is_private should be
ignored/overwritten or not.
So maybe kvm_fault_is_private() needs to return an integer value
instead, like:
enum {
KVM_FAULT_VMM_DEFINED,
KVM_FAULT_SHARED,
KVM_FAULT_PRIVATE,
}
bool kvm_fault_is_private(kvm, gpa, err)
{
/* SEV-SNP handling */
if (kvm->arch.mmu_private_fault_mask)
(err & arch.mmu_private_fault_mask) ? KVM_FAULT_PRIVATE : KVM_FAULT_SHARED
/* TDX handling */
if (kvm->arch.gfn_shared_mask)
(gpa & arch.gfn_shared_mask) ? KVM_FAULT_SHARED : KVM_FAULT_PRIVATE
return KVM_FAULT_VMM_DEFINED;
}
And then down in __kvm_faultin_pfn() we do:
if (fault->is_private == KVM_FAULT_VMM_DEFINED)
fault->is_private = kvm_mem_is_private(vcpu->kvm, fault->gfn);
else if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
return kvm_do_memory_fault_exit(vcpu, fault);
if (fault->is_private)
return kvm_faultin_pfn_private(vcpu, fault);
Maybe kvm_fault_is_private() can be simplified based on what direction
we end up taking WRT ongoing discussions like whether we decide to define
KVM_X86_{SNP,TDX}_VM vm_types in addition to the KVM_X86_PROTECTED_VM
type that the selftests uses, but hoping that for this path, any changes
along that line can be encapsulated away in kvm_fault_is_private() without
any/much further churn at the various call-sites like __kvm_faultin_pfn().
We could even push all the above logic down into the KVM self-tests, but
have:
bool kvm_fault_is_private(kvm, gpa, err) {
return KVM_FAULT_VMM_DEFINED;
}
And that would be enough to run self-tests as standalone series, with
TDX/SNP should filling in kvm_fault_is_private() with their
platform-specific handling.
Does that seem reasonable to you? At least as a starting point.
-Mike
>
> Thanks,
> --
> Isaku Yamahata <[email protected]>
>
> So if we were to straight-forwardly implement that based on how TDX
> currently handles checking for the shared bit in GPA, paired with how
> SEV-SNP handles checking for private bit in fault flags, it would look
> something like:
>
> bool kvm_fault_is_private(kvm, gpa, err)
> {
> /* SEV-SNP handling */
> if (kvm->arch.mmu_private_fault_mask)
> return !!(err & arch.mmu_private_fault_mask);
>
> /* TDX handling */
> if (kvm->arch.gfn_shared_mask)
> return !!(gpa & arch.gfn_shared_mask);
The logic of the two are identical. I think they need to be converged.
Either SEV-SNP should convert the error code private bit to the gfn_shared_mask,
or TDX's shared bit should be converted to some private error bit.
Perhaps converting SEV-SNP makes more sense because if I recall correctly SEV
guest also has a C-bit, correct?
Or, ...
>
> return false;
> }
>
> kvm_mmu_do_page_fault(vcpu, gpa, err, ...)
> {
> struct kvm_page_fault fault = {
> ...
> .is_private = kvm_fault_is_private(vcpu->kvm, gpa, err)
... should we do something like:
.is_private = static_call(kvm_x86_fault_is_private)(vcpu->kvm, gpa,
err);
?
> };
>
> ...
> }
>
> And then arch.mmu_private_fault_mask and arch.gfn_shared_mask would be
> set per-KVM-instance, just like they are now with current SNP and TDX
> patchsets, since stuff like KVM self-test wouldn't be setting those
> masks, so it makes sense to do it per-instance in that regard.
>
> But that still gets a little awkward for the KVM self-test use-case where
> .is_private should sort of be ignored in favor of whatever the xarray
> reports via kvm_mem_is_private().
>
I must have missed something. Why does KVM self-test have impact to how does
KVM handles private fault?
> In your Misc. series I believe you
> handled this by introducing a PFERR_HASATTR_MASK bit so we can determine
> whether existing value of fault->is_private should be
> ignored/overwritten or not.
>
> So maybe kvm_fault_is_private() needs to return an integer value
> instead, like:
>
> enum {
> KVM_FAULT_VMM_DEFINED,
> KVM_FAULT_SHARED,
> KVM_FAULT_PRIVATE,
> }
>
> bool kvm_fault_is_private(kvm, gpa, err)
> {
> /* SEV-SNP handling */
> if (kvm->arch.mmu_private_fault_mask)
> (err & arch.mmu_private_fault_mask) ? KVM_FAULT_PRIVATE : KVM_FAULT_SHARED
>
> /* TDX handling */
> if (kvm->arch.gfn_shared_mask)
> (gpa & arch.gfn_shared_mask) ? KVM_FAULT_SHARED : KVM_FAULT_PRIVATE
>
> return KVM_FAULT_VMM_DEFINED;
> }
>
> And then down in __kvm_faultin_pfn() we do:
>
> if (fault->is_private == KVM_FAULT_VMM_DEFINED)
> fault->is_private = kvm_mem_is_private(vcpu->kvm, fault->gfn);
> else if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
> return kvm_do_memory_fault_exit(vcpu, fault);
>
> if (fault->is_private)
> return kvm_faultin_pfn_private(vcpu, fault);
What does KVM_FAULT_VMM_DEFINED mean, exactly?
Shouldn't the fault type come from _hardware_?