From: Sean Christopherson <[email protected]>
Add support to MMU caches for initializing a page with a custom 64-bit
value, e.g. to pre-fill an entire page table with non-zero PTE values.
The functionality will be used by x86 to support Intel's TDX, which needs
to set bit 63 in all non-present PTEs in order to prevent !PRESENT page
faults from getting reflected into the guest (Intel's EPT Violation #VE
architecture made the less than brilliant decision of having the per-PTE
behavior be opt-out instead of opt-in).
Signed-off-by: Sean Christopherson <[email protected]>
Signed-off-by: Isaku Yamahata <[email protected]>
Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <[email protected]>
---
include/linux/kvm_types.h | 1 +
virt/kvm/kvm_main.c | 16 ++++++++++++++--
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index d93f6522b2c3..827ecc0b7e10 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
struct kvm_mmu_memory_cache {
gfp_t gfp_zero;
gfp_t gfp_custom;
+ u64 init_value;
struct kmem_cache *kmem_cache;
int capacity;
int nobjs;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9c99c9373a3e..c9828feb7a1c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
gfp_t gfp_flags)
{
+ void *page;
+
gfp_flags |= mc->gfp_zero;
if (mc->kmem_cache)
return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
- else
- return (void *)__get_free_page(gfp_flags);
+
+ page = (void *)__get_free_page(gfp_flags);
+ if (page && mc->init_value)
+ memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value));
+ return page;
}
int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min)
@@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity,
if (WARN_ON_ONCE(!capacity))
return -EIO;
+ /*
+ * Custom init values can be used only for page allocations,
+ * and obviously conflict with __GFP_ZERO.
+ */
+ if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero)))
+ return -EIO;
+
mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
if (!mc->objects)
return -ENOMEM;
--
2.39.0
On 2/28/2024 7:20 AM, Paolo Bonzini wrote:
> From: Sean Christopherson <[email protected]>
>
> Add support to MMU caches for initializing a page with a custom 64-bit
> value, e.g. to pre-fill an entire page table with non-zero PTE values.
> The functionality will be used by x86 to support Intel's TDX, which needs
> to set bit 63 in all non-present PTEs in order to prevent !PRESENT page
> faults from getting reflected into the guest (Intel's EPT Violation #VE
> architecture made the less than brilliant decision of having the per-PTE
> behavior be opt-out instead of opt-in).
>
> Signed-off-by: Sean Christopherson <[email protected]>
> Signed-off-by: Isaku Yamahata <[email protected]>
> Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com>
> Signed-off-by: Paolo Bonzini <[email protected]>
Reviewed-by: Xiaoyao Li <[email protected]>
> ---
> include/linux/kvm_types.h | 1 +
> virt/kvm/kvm_main.c | 16 ++++++++++++++--
> 2 files changed, 15 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index d93f6522b2c3..827ecc0b7e10 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
> struct kvm_mmu_memory_cache {
> gfp_t gfp_zero;
> gfp_t gfp_custom;
> + u64 init_value;
> struct kmem_cache *kmem_cache;
> int capacity;
> int nobjs;
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 9c99c9373a3e..c9828feb7a1c 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
> static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
> gfp_t gfp_flags)
> {
> + void *page;
> +
> gfp_flags |= mc->gfp_zero;
>
> if (mc->kmem_cache)
> return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
> - else
> - return (void *)__get_free_page(gfp_flags);
> +
> + page = (void *)__get_free_page(gfp_flags);
> + if (page && mc->init_value)
> + memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value));
> + return page;
> }
>
> int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min)
> @@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity,
> if (WARN_ON_ONCE(!capacity))
> return -EIO;
>
> + /*
> + * Custom init values can be used only for page allocations,
> + * and obviously conflict with __GFP_ZERO.
> + */
> + if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero)))
> + return -EIO;
> +
> mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
> if (!mc->objects)
> return -ENOMEM;
On 2/28/2024 7:20 AM, Paolo Bonzini wrote:
> From: Sean Christopherson <[email protected]>
>
> Add support to MMU caches for initializing a page with a custom 64-bit
> value, e.g. to pre-fill an entire page table with non-zero PTE values.
> The functionality will be used by x86 to support Intel's TDX, which needs
> to set bit 63 in all non-present PTEs in order to prevent !PRESENT page
> faults from getting reflected into the guest (Intel's EPT Violation #VE
> architecture made the less than brilliant decision of having the per-PTE
> behavior be opt-out instead of opt-in).
>
> Signed-off-by: Sean Christopherson <[email protected]>
> Signed-off-by: Isaku Yamahata <[email protected]>
> Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> include/linux/kvm_types.h | 1 +
> virt/kvm/kvm_main.c | 16 ++++++++++++++--
> 2 files changed, 15 insertions(+), 2 deletions(-)
Reviewed-by: Binbin Wu <[email protected]>
>
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index d93f6522b2c3..827ecc0b7e10 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
> struct kvm_mmu_memory_cache {
> gfp_t gfp_zero;
> gfp_t gfp_custom;
> + u64 init_value;
> struct kmem_cache *kmem_cache;
> int capacity;
> int nobjs;
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 9c99c9373a3e..c9828feb7a1c 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
> static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
> gfp_t gfp_flags)
> {
> + void *page;
> +
> gfp_flags |= mc->gfp_zero;
>
> if (mc->kmem_cache)
> return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
> - else
> - return (void *)__get_free_page(gfp_flags);
> +
> + page = (void *)__get_free_page(gfp_flags);
> + if (page && mc->init_value)
> + memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value));
> + return page;
> }
>
> int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min)
> @@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity,
> if (WARN_ON_ONCE(!capacity))
> return -EIO;
>
> + /*
> + * Custom init values can be used only for page allocations,
> + * and obviously conflict with __GFP_ZERO.
> + */
> + if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero)))
> + return -EIO;
> +
> mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
> if (!mc->objects)
> return -ENOMEM;
On 3/5/2024 2:55 PM, Binbin Wu wrote:
>
>
> On 2/28/2024 7:20 AM, Paolo Bonzini wrote:
>> From: Sean Christopherson <[email protected]>
>>
>> Add support to MMU caches for initializing a page with a custom 64-bit
>> value, e.g. to pre-fill an entire page table with non-zero PTE values.
>> The functionality will be used by x86 to support Intel's TDX, which
>> needs
>> to set bit 63 in all non-present PTEs in order to prevent !PRESENT page
>> faults from getting reflected into the guest (Intel's EPT Violation #VE
>> architecture made the less than brilliant decision of having the per-PTE
>> behavior be opt-out instead of opt-in).
>>
>> Signed-off-by: Sean Christopherson <[email protected]>
>> Signed-off-by: Isaku Yamahata <[email protected]>
>> Message-Id:
>> <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com>
>> Signed-off-by: Paolo Bonzini <[email protected]>
>> ---
>> include/linux/kvm_types.h | 1 +
>> virt/kvm/kvm_main.c | 16 ++++++++++++++--
>> 2 files changed, 15 insertions(+), 2 deletions(-)
>
> Reviewed-by: Binbin Wu <[email protected]>
>
>>
>> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
>> index d93f6522b2c3..827ecc0b7e10 100644
>> --- a/include/linux/kvm_types.h
>> +++ b/include/linux/kvm_types.h
>> @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
>> struct kvm_mmu_memory_cache {
>> gfp_t gfp_zero;
>> gfp_t gfp_custom;
>> + u64 init_value;
>> struct kmem_cache *kmem_cache;
>> int capacity;
>> int nobjs;
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 9c99c9373a3e..c9828feb7a1c 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
>> static inline void *mmu_memory_cache_alloc_obj(struct
>> kvm_mmu_memory_cache *mc,
>> gfp_t gfp_flags)
>> {
>> + void *page;
>> +
>> gfp_flags |= mc->gfp_zero;
>> if (mc->kmem_cache)
>> return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
>> - else
>> - return (void *)__get_free_page(gfp_flags);
>> +
>> + page = (void *)__get_free_page(gfp_flags);
>> + if (page && mc->init_value)
>> + memset64(page, mc->init_value, PAGE_SIZE /
>> sizeof(mc->init_value));
Do we need a static_assert() to make sure mc->init_value is 64bit?
>> + return page;
>> }
>> int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc,
>> int capacity, int min)
>> @@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct
>> kvm_mmu_memory_cache *mc, int capacity,
>> if (WARN_ON_ONCE(!capacity))
>> return -EIO;
>> + /*
>> + * Custom init values can be used only for page allocations,
>> + * and obviously conflict with __GFP_ZERO.
>> + */
>> + if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache ||
>> mc->gfp_zero)))
>> + return -EIO;
>> +
>> mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
>> if (!mc->objects)
>> return -ENOMEM;
>
>
On Tue, Mar 26, 2024 at 11:56:35PM +0800,
Binbin Wu <[email protected]> wrote:
> On 3/5/2024 2:55 PM, Binbin Wu wrote:
> >
> >
> > On 2/28/2024 7:20 AM, Paolo Bonzini wrote:
> > > From: Sean Christopherson <[email protected]>
> > >
> > > Add support to MMU caches for initializing a page with a custom 64-bit
> > > value, e.g. to pre-fill an entire page table with non-zero PTE values.
> > > The functionality will be used by x86 to support Intel's TDX, which
> > > needs
> > > to set bit 63 in all non-present PTEs in order to prevent !PRESENT page
> > > faults from getting reflected into the guest (Intel's EPT Violation #VE
> > > architecture made the less than brilliant decision of having the per-PTE
> > > behavior be opt-out instead of opt-in).
> > >
> > > Signed-off-by: Sean Christopherson <[email protected]>
> > > Signed-off-by: Isaku Yamahata <[email protected]>
> > > Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com>
> > > Signed-off-by: Paolo Bonzini <[email protected]>
> > > ---
> > > include/linux/kvm_types.h | 1 +
> > > virt/kvm/kvm_main.c | 16 ++++++++++++++--
> > > 2 files changed, 15 insertions(+), 2 deletions(-)
> >
> > Reviewed-by: Binbin Wu <[email protected]>
> >
> > >
> > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > > index d93f6522b2c3..827ecc0b7e10 100644
> > > --- a/include/linux/kvm_types.h
> > > +++ b/include/linux/kvm_types.h
> > > @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
> > > struct kvm_mmu_memory_cache {
> > > gfp_t gfp_zero;
> > > gfp_t gfp_custom;
> > > + u64 init_value;
> > > struct kmem_cache *kmem_cache;
> > > int capacity;
> > > int nobjs;
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index 9c99c9373a3e..c9828feb7a1c 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
> > > static inline void *mmu_memory_cache_alloc_obj(struct
> > > kvm_mmu_memory_cache *mc,
> > > gfp_t gfp_flags)
> > > {
> > > + void *page;
> > > +
> > > gfp_flags |= mc->gfp_zero;
> > > if (mc->kmem_cache)
> > > return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
> > > - else
> > > - return (void *)__get_free_page(gfp_flags);
> > > +
> > > + page = (void *)__get_free_page(gfp_flags);
> > > + if (page && mc->init_value)
> > > + memset64(page, mc->init_value, PAGE_SIZE /
> > > sizeof(mc->init_value));
>
> Do we need a static_assert() to make sure mc->init_value is 64bit?
That's overkill because EPT entry is defined as 64bit and KVM uses u64 for it
uniformly.
--
Isaku Yamahata <[email protected]>
On Mon, May 13, 2024 at 01:38:39PM -0700,
Isaku Yamahata <[email protected]> wrote:
> > > > index 9c99c9373a3e..c9828feb7a1c 100644
> > > > --- a/virt/kvm/kvm_main.c
> > > > +++ b/virt/kvm/kvm_main.c
> > > > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
> > > > static inline void *mmu_memory_cache_alloc_obj(struct
> > > > kvm_mmu_memory_cache *mc,
> > > > gfp_t gfp_flags)
> > > > {
> > > > + void *page;
> > > > +
> > > > gfp_flags |= mc->gfp_zero;
> > > > if (mc->kmem_cache)
> > > > return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
> > > > - else
> > > > - return (void *)__get_free_page(gfp_flags);
> > > > +
> > > > + page = (void *)__get_free_page(gfp_flags);
> > > > + if (page && mc->init_value)
> > > > + memset64(page, mc->init_value, PAGE_SIZE /
> > > > sizeof(mc->init_value));
> >
> > Do we need a static_assert() to make sure mc->init_value is 64bit?
>
> That's overkill because EPT entry is defined as 64bit and KVM uses u64 for it
> uniformly.
Oops, I picked the wrong mail to reply. Sorry for noise.
--
Isaku Yamahata <[email protected]>
On Mon, May 13, 2024, Isaku Yamahata wrote:
> On Tue, Mar 26, 2024 at 11:56:35PM +0800, Binbin Wu <[email protected]> wrote:
> > > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > > > index d93f6522b2c3..827ecc0b7e10 100644
> > > > --- a/include/linux/kvm_types.h
> > > > +++ b/include/linux/kvm_types.h
> > > > @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
> > > > struct kvm_mmu_memory_cache {
> > > > gfp_t gfp_zero;
> > > > gfp_t gfp_custom;
> > > > + u64 init_value;
> > > > struct kmem_cache *kmem_cache;
> > > > int capacity;
> > > > int nobjs;
> > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > > index 9c99c9373a3e..c9828feb7a1c 100644
> > > > --- a/virt/kvm/kvm_main.c
> > > > +++ b/virt/kvm/kvm_main.c
> > > > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
> > > > static inline void *mmu_memory_cache_alloc_obj(struct
> > > > kvm_mmu_memory_cache *mc,
> > > > gfp_t gfp_flags)
> > > > {
> > > > + void *page;
> > > > +
> > > > gfp_flags |= mc->gfp_zero;
> > > > if (mc->kmem_cache)
> > > > return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
> > > > - else
> > > > - return (void *)__get_free_page(gfp_flags);
> > > > +
> > > > + page = (void *)__get_free_page(gfp_flags);
> > > > + if (page && mc->init_value)
> > > > + memset64(page, mc->init_value, PAGE_SIZE /
> > > > sizeof(mc->init_value));
> >
> > Do we need a static_assert() to make sure mc->init_value is 64bit?
>
> That's overkill because EPT entry is defined as 64bit and KVM uses u64 for it
> uniformly.
I'm pretty sure Binbin is talking about passing init_value to memset64(), not
about whether or not that suffices for EPT. So I wouldn't say it's overkill.
However, I don't think a static assert is warranted. Functionally, tracking
init_value as a u32 or even a u8 would be a-ok as it's a copy-by-value parameter
that won't be sign-extended or truncated. I.e. the real reqiurement comes from
TDX wanting to set a 64-bit value.
And trying to set bit 63 in a 32-bit field _will_ make the compiler unhappy:
arch/x86/kvm/mmu/mmu.c: In function ‘kvm_mmu_create’:
include/vdso/bits.h:8:33: error: conversion from ‘long long unsigned int’ to ‘u32’ {aka ‘unsigned int’} changes value from ‘9223372036854775808’ to ‘0’ [-Werror=overflow]
8 | #define BIT_ULL(nr) (ULL(1) << (nr))
| ^
arch/x86/kvm/mmu/spte.h:162:33: note: in expansion of macro ‘BIT_ULL’
162 | #define SHADOW_NONPRESENT_VALUE BIT_ULL(63)
| ^~~~~~~
arch/x86/kvm/mmu/mmu.c:6225:17: note: in expansion of macro ‘SHADOW_NONPRESENT_VALUE’
6225 | SHADOW_NONPRESENT_VALUE;
| ^~~~~~~~~~~~~~~~~~~~~~~
I suppose one could argue that changing init_value to a u128 could result in
undetected truncation, but IMO that firmly crosses into ridiculous territory.