2022-03-08 09:12:32

by Nikunj A. Dadhania

[permalink] [raw]
Subject: [PATCH RFC v1 9/9] KVM: SVM: Pin SEV pages in MMU during sev_launch_update_data()

From: Sean Christopherson <[email protected]>

Pin the memory for the data being passed to launch_update_data()
because it gets encrypted before the guest is first run and must
not be moved which would corrupt it.

Signed-off-by: Sean Christopherson <[email protected]>
[ * Use kvm_for_each_memslot_in_hva_range() to find slot and iterate
* Updated sev_pin_memory_in_mmu() error handling.
* As pinning/unpining pages is handled within MMU, removed
{get,put}_user(). ]
Signed-off-by: Nikunj A Dadhania <[email protected]>
---
arch/x86/kvm/svm/sev.c | 146 +++++++++++++++++++++++++++++++++++++----
1 file changed, 134 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 7e39320fc65d..1c371268934b 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -22,6 +22,7 @@
#include <asm/trapnr.h>
#include <asm/fpu/xcr.h>

+#include "mmu.h"
#include "x86.h"
#include "svm.h"
#include "svm_ops.h"
@@ -428,9 +429,93 @@ static void *sev_alloc_pages(struct kvm_sev_info *sev, unsigned long uaddr,
return pages;
}

+#define SEV_PFERR_RO (PFERR_USER_MASK)
+#define SEV_PFERR_RW (PFERR_WRITE_MASK | PFERR_USER_MASK)
+
+static struct page **sev_pin_memory_in_mmu(struct kvm *kvm, unsigned long addr,
+ unsigned long size,
+ unsigned long *npages)
+{
+ unsigned long hva_start, hva_end, uaddr, end, slot_start, slot_end;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct interval_tree_node *node;
+ struct kvm_memory_slot *slot;
+ struct kvm_memslots *slots;
+ int idx, ret = 0, i = 0;
+ struct kvm_vcpu *vcpu;
+ struct page **pages;
+ kvm_pfn_t pfn;
+ u32 err_code;
+ gfn_t gfn;
+
+ pages = sev_alloc_pages(sev, addr, size, npages);
+ if (IS_ERR(pages))
+ return pages;
+
+ vcpu = kvm_get_vcpu(kvm, 0);
+ if (mutex_lock_killable(&vcpu->mutex)) {
+ kvfree(pages);
+ return ERR_PTR(-EINTR);
+ }
+
+ vcpu_load(vcpu);
+ idx = srcu_read_lock(&kvm->srcu);
+
+ kvm_mmu_load(vcpu);
+
+ end = addr + (*npages << PAGE_SHIFT);
+ slots = kvm_memslots(kvm);
+
+ kvm_for_each_memslot_in_hva_range(node, slots, addr, end) {
+ slot = container_of(node, struct kvm_memory_slot,
+ hva_node[slots->node_idx]);
+ slot_start = slot->userspace_addr;
+ slot_end = slot_start + (slot->npages << PAGE_SHIFT);
+ hva_start = max(addr, slot_start);
+ hva_end = min(end, slot_end);
+
+ err_code = (slot->flags & KVM_MEM_READONLY) ?
+ SEV_PFERR_RO : SEV_PFERR_RW;
+
+ for (uaddr = hva_start; uaddr < hva_end; uaddr += PAGE_SIZE) {
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+
+ if (need_resched())
+ cond_resched();
+
+ /*
+ * Fault in the page and sev_pin_page() will handle the
+ * pinning
+ */
+ gfn = hva_to_gfn_memslot(uaddr, slot);
+ pfn = kvm_mmu_map_tdp_page(vcpu, gfn_to_gpa(gfn),
+ err_code, PG_LEVEL_4K);
+ if (is_error_noslot_pfn(pfn)) {
+ ret = -EFAULT;
+ break;
+ }
+ pages[i++] = pfn_to_page(pfn);
+ }
+ }
+
+ kvm_mmu_unload(vcpu);
+ srcu_read_unlock(&kvm->srcu, idx);
+ vcpu_put(vcpu);
+ mutex_unlock(&vcpu->mutex);
+
+ if (!ret)
+ return pages;
+
+ kvfree(pages);
+ return ERR_PTR(ret);
+}
+
static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned long ulen, unsigned long *n,
- int write)
+ int write, bool mmu_usable)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct pinned_region *region;
@@ -441,6 +526,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,

lockdep_assert_held(&kvm->lock);

+ /* Use MMU based pinning if possible. */
+ if (mmu_usable)
+ return sev_pin_memory_in_mmu(kvm, uaddr, ulen, n);
+
pages = sev_alloc_pages(sev, uaddr, ulen, &npages);
if (IS_ERR(pages))
return pages;
@@ -558,6 +647,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ bool mmu_usable = atomic_read(&kvm->online_vcpus) > 0;
struct kvm_sev_launch_update_data params;
struct sev_data_launch_update_data data;
struct page **inpages;
@@ -574,15 +664,18 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
vaddr_end = vaddr + size;

/* Lock the user memory. */
- inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
+ inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1, mmu_usable);
if (IS_ERR(inpages))
return PTR_ERR(inpages);

/*
* Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
* place; the cache may contain the data that was written unencrypted.
+ * Flushing is automatically handled if the pages can be pinned in the
+ * MMU.
*/
- sev_clflush_pages(inpages, npages);
+ if (!mmu_usable)
+ sev_clflush_pages(inpages, npages);

data.reserved = 0;
data.handle = sev->handle;
@@ -617,9 +710,14 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
set_page_dirty_lock(inpages[i]);
mark_page_accessed(inpages[i]);
}
- /* unlock the user pages on error */
+ /*
+ * unlock the user pages on error, else pages will be unpinned either
+ * during memslot free path or vm destroy path
+ */
if (ret)
sev_unpin_memory(kvm, inpages, npages);
+ else if (mmu_usable)
+ kvfree(inpages);
return ret;
}

@@ -1001,11 +1099,11 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
int len, s_off, d_off;

/* lock userspace source and destination page */
- src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
+ src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0, false);
if (IS_ERR(src_p))
return PTR_ERR(src_p);

- dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
+ dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1, false);
if (IS_ERR(dst_p)) {
sev_unpin_memory(kvm, src_p, n);
return PTR_ERR(dst_p);
@@ -1057,6 +1155,7 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)

static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
+ bool mmu_usable = atomic_read(&kvm->online_vcpus) > 0;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_secret data;
struct kvm_sev_launch_secret params;
@@ -1071,15 +1170,18 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
return -EFAULT;

- pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
+ pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1, mmu_usable);
if (IS_ERR(pages))
return PTR_ERR(pages);

/*
* Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
* place; the cache may contain the data that was written unencrypted.
+ * Flushing is automatically handled if the pages can be pinned in the
+ * MMU.
*/
- sev_clflush_pages(pages, n);
+ if (!mmu_usable)
+ sev_clflush_pages(pages, n);

/*
* The secret must be copied into contiguous memory region, lets verify
@@ -1126,8 +1228,15 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
set_page_dirty_lock(pages[i]);
mark_page_accessed(pages[i]);
}
+ /*
+ * unlock the user pages on error, else pages will be unpinned either
+ * during memslot free path or vm destroy path
+ */
if (ret)
sev_unpin_memory(kvm, pages, n);
+ else if (mmu_usable)
+ kvfree(pages);
+
return ret;
}

@@ -1358,7 +1467,7 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)

/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
- PAGE_SIZE, &n, 0);
+ PAGE_SIZE, &n, 0, false);
if (IS_ERR(guest_page))
return PTR_ERR(guest_page);

@@ -1406,6 +1515,10 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
e_free_hdr:
kfree(hdr);
e_unpin:
+ /*
+ * unlock the user pages on error, else pages will be unpinned either
+ * during memslot free path or vm destroy path
+ */
if (ret)
sev_unpin_memory(kvm, guest_page, n);

@@ -1512,6 +1625,7 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)

static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
+ bool mmu_usable = atomic_read(&kvm->online_vcpus) > 0;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_receive_update_data params;
struct sev_data_receive_update_data data;
@@ -1555,7 +1669,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)

/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
- PAGE_SIZE, &n, 1);
+ PAGE_SIZE, &n, 1, mmu_usable);
if (IS_ERR(guest_page)) {
ret = PTR_ERR(guest_page);
goto e_free_trans;
@@ -1564,9 +1678,11 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/*
* Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP
* encrypts the written data with the guest's key, and the cache may
- * contain dirty, unencrypted data.
+ * contain dirty, unencrypted data. Flushing is automatically handled if
+ * the pages can be pinned in the MMU.
*/
- sev_clflush_pages(guest_page, n);
+ if (!mmu_usable)
+ sev_clflush_pages(guest_page, n);

/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
@@ -1577,8 +1693,14 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data,
&argp->error);

+ /*
+ * unlock the user pages on error, else pages will be unpinned either
+ * during memslot free path or vm destroy path
+ */
if (ret)
sev_unpin_memory(kvm, guest_page, n);
+ else if (mmu_usable)
+ kvfree(guest_page);

e_free_trans:
kfree(trans);
--
2.32.0


2022-03-09 22:34:20

by Maciej S. Szmigiero

[permalink] [raw]
Subject: Re: [PATCH RFC v1 9/9] KVM: SVM: Pin SEV pages in MMU during sev_launch_update_data()

On 8.03.2022 05:38, Nikunj A Dadhania wrote:
> From: Sean Christopherson <[email protected]>
>
> Pin the memory for the data being passed to launch_update_data()
> because it gets encrypted before the guest is first run and must
> not be moved which would corrupt it.
>
> Signed-off-by: Sean Christopherson <[email protected]>
> [ * Use kvm_for_each_memslot_in_hva_range() to find slot and iterate
> * Updated sev_pin_memory_in_mmu() error handling.
> * As pinning/unpining pages is handled within MMU, removed
> {get,put}_user(). ]
> Signed-off-by: Nikunj A Dadhania <[email protected]>
> ---
> arch/x86/kvm/svm/sev.c | 146 +++++++++++++++++++++++++++++++++++++----
> 1 file changed, 134 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 7e39320fc65d..1c371268934b 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -22,6 +22,7 @@
> #include <asm/trapnr.h>
> #include <asm/fpu/xcr.h>
>
> +#include "mmu.h"
> #include "x86.h"
> #include "svm.h"
> #include "svm_ops.h"
> @@ -428,9 +429,93 @@ static void *sev_alloc_pages(struct kvm_sev_info *sev, unsigned long uaddr,
> return pages;
> }
>
> +#define SEV_PFERR_RO (PFERR_USER_MASK)
> +#define SEV_PFERR_RW (PFERR_WRITE_MASK | PFERR_USER_MASK)
> +
> +static struct page **sev_pin_memory_in_mmu(struct kvm *kvm, unsigned long addr,
> + unsigned long size,
> + unsigned long *npages)
> +{
> + unsigned long hva_start, hva_end, uaddr, end, slot_start, slot_end;
> + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> + struct interval_tree_node *node;
> + struct kvm_memory_slot *slot;
> + struct kvm_memslots *slots;
> + int idx, ret = 0, i = 0;
> + struct kvm_vcpu *vcpu;
> + struct page **pages;
> + kvm_pfn_t pfn;
> + u32 err_code;
> + gfn_t gfn;
> +
> + pages = sev_alloc_pages(sev, addr, size, npages);
> + if (IS_ERR(pages))
> + return pages;
> +
> + vcpu = kvm_get_vcpu(kvm, 0);
> + if (mutex_lock_killable(&vcpu->mutex)) {
> + kvfree(pages);
> + return ERR_PTR(-EINTR);
> + }
> +
> + vcpu_load(vcpu);
> + idx = srcu_read_lock(&kvm->srcu);
> +
> + kvm_mmu_load(vcpu);
> +
> + end = addr + (*npages << PAGE_SHIFT);
> + slots = kvm_memslots(kvm);
> +
> + kvm_for_each_memslot_in_hva_range(node, slots, addr, end) {
> + slot = container_of(node, struct kvm_memory_slot,
> + hva_node[slots->node_idx]);
> + slot_start = slot->userspace_addr;
> + slot_end = slot_start + (slot->npages << PAGE_SHIFT);
> + hva_start = max(addr, slot_start);
> + hva_end = min(end, slot_end);
> +
> + err_code = (slot->flags & KVM_MEM_READONLY) ?
> + SEV_PFERR_RO : SEV_PFERR_RW;
> +
> + for (uaddr = hva_start; uaddr < hva_end; uaddr += PAGE_SIZE) {
> + if (signal_pending(current)) {
> + ret = -ERESTARTSYS;
> + break;
> + }
> +
> + if (need_resched())
> + cond_resched();
> +
> + /*
> + * Fault in the page and sev_pin_page() will handle the
> + * pinning
> + */
> + gfn = hva_to_gfn_memslot(uaddr, slot);
> + pfn = kvm_mmu_map_tdp_page(vcpu, gfn_to_gpa(gfn),
> + err_code, PG_LEVEL_4K);
> + if (is_error_noslot_pfn(pfn)) {
> + ret = -EFAULT;
> + break;
> + }
> + pages[i++] = pfn_to_page(pfn);
> + }
> + }

This algorithm looks much better than the previews one - thanks!

By the way, as far as I know, there could be duplicates in the "page" array
above since the same hva can be mapped to multiple gfns (in different memslots).
Is the code prepared to deal with this possibility?

Thanks,
Maciej

2022-03-10 01:33:52

by Nikunj A. Dadhania

[permalink] [raw]
Subject: Re: [PATCH RFC v1 9/9] KVM: SVM: Pin SEV pages in MMU during sev_launch_update_data()

On 3/9/2022 10:27 PM, Maciej S. Szmigiero wrote:
> On 8.03.2022 05:38, Nikunj A Dadhania wrote:
>> From: Sean Christopherson <[email protected]>
>>
>> Pin the memory for the data being passed to launch_update_data()
>> because it gets encrypted before the guest is first run and must
>> not be moved which would corrupt it.
>>
>> Signed-off-by: Sean Christopherson <[email protected]>
>> [ * Use kvm_for_each_memslot_in_hva_range() to find slot and iterate
>>    * Updated sev_pin_memory_in_mmu() error handling.
>>    * As pinning/unpining pages is handled within MMU, removed
>>      {get,put}_user(). ]
>> Signed-off-by: Nikunj A Dadhania <[email protected]>
>> ---
>>   arch/x86/kvm/svm/sev.c | 146 +++++++++++++++++++++++++++++++++++++----
>>   1 file changed, 134 insertions(+), 12 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
>> index 7e39320fc65d..1c371268934b 100644
>> --- a/arch/x86/kvm/svm/sev.c
>> +++ b/arch/x86/kvm/svm/sev.c
>> @@ -22,6 +22,7 @@
>>   #include <asm/trapnr.h>
>>   #include <asm/fpu/xcr.h>
>>   +#include "mmu.h"
>>   #include "x86.h"
>>   #include "svm.h"
>>   #include "svm_ops.h"
>> @@ -428,9 +429,93 @@ static void *sev_alloc_pages(struct kvm_sev_info *sev, unsigned long uaddr,
>>       return pages;
>>   }
>>   +#define SEV_PFERR_RO (PFERR_USER_MASK)
>> +#define SEV_PFERR_RW (PFERR_WRITE_MASK | PFERR_USER_MASK)
>> +
>> +static struct page **sev_pin_memory_in_mmu(struct kvm *kvm, unsigned long addr,
>> +                       unsigned long size,
>> +                       unsigned long *npages)
>> +{
>> +    unsigned long hva_start, hva_end, uaddr, end, slot_start, slot_end;
>> +    struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
>> +    struct interval_tree_node *node;
>> +    struct kvm_memory_slot *slot;
>> +    struct kvm_memslots *slots;
>> +    int idx, ret = 0, i = 0;
>> +    struct kvm_vcpu *vcpu;
>> +    struct page **pages;
>> +    kvm_pfn_t pfn;
>> +    u32 err_code;
>> +    gfn_t gfn;
>> +
>> +    pages = sev_alloc_pages(sev, addr, size, npages);
>> +    if (IS_ERR(pages))
>> +        return pages;
>> +
>> +    vcpu = kvm_get_vcpu(kvm, 0);
>> +    if (mutex_lock_killable(&vcpu->mutex)) {
>> +        kvfree(pages);
>> +        return ERR_PTR(-EINTR);
>> +    }
>> +
>> +    vcpu_load(vcpu);
>> +    idx = srcu_read_lock(&kvm->srcu);
>> +
>> +    kvm_mmu_load(vcpu);
>> +
>> +    end = addr + (*npages << PAGE_SHIFT);
>> +    slots = kvm_memslots(kvm);
>> +
>> +    kvm_for_each_memslot_in_hva_range(node, slots, addr, end) {
>> +        slot = container_of(node, struct kvm_memory_slot,
>> +                    hva_node[slots->node_idx]);
>> +        slot_start = slot->userspace_addr;
>> +        slot_end = slot_start + (slot->npages << PAGE_SHIFT);
>> +        hva_start = max(addr, slot_start);
>> +        hva_end = min(end, slot_end);
>> +
>> +        err_code = (slot->flags & KVM_MEM_READONLY) ?
>> +            SEV_PFERR_RO : SEV_PFERR_RW;
>> +
>> +        for (uaddr = hva_start; uaddr < hva_end; uaddr += PAGE_SIZE) {
>> +            if (signal_pending(current)) {
>> +                ret = -ERESTARTSYS;
>> +                break;
>> +            }
>> +
>> +            if (need_resched())
>> +                cond_resched();
>> +
>> +            /*
>> +             * Fault in the page and sev_pin_page() will handle the
>> +             * pinning
>> +             */
>> +            gfn = hva_to_gfn_memslot(uaddr, slot);
>> +            pfn = kvm_mmu_map_tdp_page(vcpu, gfn_to_gpa(gfn),
>> +                           err_code, PG_LEVEL_4K);
>> +            if (is_error_noslot_pfn(pfn)) {
>> +                ret = -EFAULT;
>> +                break;
>> +            }
>> +            pages[i++] = pfn_to_page(pfn);
>> +        }
>> +    }
>
> This algorithm looks much better than the previews one - thanks!

Thanks for your feedback earlier.

> By the way, as far as I know, there could be duplicates in the "page" array
> above since the same hva can be mapped to multiple gfns (in different memslots).
> Is the code prepared to deal with this possibility?

Yes, as the pinning is done with pfn as index, it can get pinned multiple times. During
memslot destroy path they would get unpinned.

Regards
Nikunj