Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751618AbdINOAg (ORCPT ); Thu, 14 Sep 2017 10:00:36 -0400 Received: from mx2.suse.de ([195.135.220.15]:38086 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1751581AbdINOAf (ORCPT ); Thu, 14 Sep 2017 10:00:35 -0400 Date: Thu, 14 Sep 2017 16:00:21 +0200 From: Borislav Petkov To: Brijesh Singh Cc: linux-kernel@vger.kernel.org, x86@kernel.org, kvm@vger.kernel.org, Thomas Gleixner , Joerg Roedel , "Michael S . Tsirkin" , Paolo Bonzini , =?utf-8?B?XCJSYWRpbSBLcsSNbcOhxZlcIg==?= , Tom Lendacky Subject: Re: [RFC Part2 PATCH v3 22/26] KVM: SVM: Pin guest memory when SEV is active Message-ID: <20170914140021.omgcxnjdcyet3lua@pd.tnic> References: <20170724200303.12197-1-brijesh.singh@amd.com> <20170724200303.12197-23-brijesh.singh@amd.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline Content-Transfer-Encoding: 8bit In-Reply-To: <20170724200303.12197-23-brijesh.singh@amd.com> User-Agent: NeoMutt/20170113 (1.7.2) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7720 Lines: 276 On Mon, Jul 24, 2017 at 03:02:59PM -0500, Brijesh Singh wrote: > The SEV memory encryption engine uses a tweak such that two identical > plaintexts at different location will have a different ciphertexts. plaintexts or plaintext pages? also, s/a // > So swapping or moving ciphertexts of two pages will not result in > plaintexts being swapped. Relocating (or migrating) a physical backing s/a // > pages for SEV guest will require some additional steps. The current SEV "for a SEV guest" > key management spec does not provide commands to swap or migrate (move) > ciphertexts. For now, we pin the guest memory registered through > KVM_MEMORY_ENCRYPT_REGISTER_RAM ioctl. > > Signed-off-by: Brijesh Singh > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/svm.c | 113 ++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 114 insertions(+) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 150177e..a91aadf 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -747,6 +747,7 @@ struct kvm_sev_info { > unsigned int handle; /* firmware handle */ > unsigned int asid; /* asid for this guest */ > int sev_fd; /* SEV device fd */ > + struct list_head ram_list; /* list of registered ram */ regions_list I guess. > }; > > struct kvm_arch { > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 75dcaa9..cdb1cf3 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -333,8 +333,19 @@ static int sev_asid_new(void); > static void sev_asid_free(int asid); > static void sev_deactivate_handle(struct kvm *kvm, int *error); > static void sev_decommission_handle(struct kvm *kvm, int *error); > +static void sev_unpin_memory(struct page **pages, unsigned long npages); Unneeded. > + > #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) > > +struct kvm_sev_pin_ram { sev_pinned_region > + struct list_head list; > + unsigned long npages; > + struct page **pages; > + struct kvm_memory_encrypt_ram userspace; That member would need a comment what it is. > +}; > + > +static void __mem_encrypt_unregister_ram(struct kvm_sev_pin_ram *ram); Move code so that you don't need that one. > + > static bool svm_sev_enabled(void) > { > return !!max_sev_asid; > @@ -385,6 +396,11 @@ static inline void sev_set_fd(struct kvm *kvm, int fd) > to_sev_info(kvm)->sev_fd = fd; > } > > +static inline struct list_head *sev_get_ram_list(struct kvm *kvm) > +{ > + return &to_sev_info(kvm)->ram_list; > +} > + > static inline void mark_all_dirty(struct vmcb *vmcb) > { > vmcb->control.clean = 0; > @@ -1566,10 +1582,24 @@ static void sev_firmware_uninit(void) > static void sev_vm_destroy(struct kvm *kvm) > { > int state, error; > + struct list_head *pos, *q; > + struct kvm_sev_pin_ram *ram; > + struct list_head *head = sev_get_ram_list(kvm); Please sort function local variables declaration in a reverse christmas tree order: longest_variable_name; shorter_var_name; even_shorter; i; > > if (!sev_guest(kvm)) > return; > > + /* > + * if userspace was terminated before unregistering the memory region > + * then lets unpin all the registered memory. > + */ > + if (!list_empty(head)) { > + list_for_each_safe(pos, q, head) { > + ram = list_entry(pos, struct kvm_sev_pin_ram, list); > + __mem_encrypt_unregister_ram(ram); You don't need the local "ram" varible here: __mem_encrypt_unregister_ram(list_entry(pos, struct kvm_sev_pin_ram, list)); > + } > + } > + > /* release the firmware resources for this guest */ > if (sev_get_handle(kvm)) { > sev_deactivate_handle(kvm, &error); > @@ -5640,6 +5670,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) > sev_set_active(kvm); > sev_set_asid(kvm, asid); > sev_set_fd(kvm, argp->sev_fd); > + INIT_LIST_HEAD(sev_get_ram_list(kvm)); > ret = 0; > e_err: > fdput(f); > @@ -6437,6 +6468,86 @@ static int svm_memory_encryption_op(struct kvm *kvm, void __user *argp) > return r; > } > > +static int mem_encrypt_register_ram(struct kvm *kvm, > + struct kvm_memory_encrypt_ram *ram) > +{ Please call that arg "regions" or so. "ram" is strange. In the other functions too. > + struct list_head *head = sev_get_ram_list(kvm); > + struct kvm_sev_pin_ram *pin_ram; > + > + if (!sev_guest(kvm)) > + return -ENOTTY; > + > + pin_ram = kzalloc(sizeof(*pin_ram), GFP_KERNEL); > + if (!pin_ram) > + return -ENOMEM; > + > + pin_ram->pages = sev_pin_memory(ram->address, ram->size, > + &pin_ram->npages, 1); Let it stick out. > + if (!pin_ram->pages) > + goto e_free; > + > + /* > + * Guest may change the memory encryption attribute from C=0 -> C=1 "The guest" > + * for this memory range. Lets make sure caches are flushed to ensure > + * that guest data gets written into memory with correct C-bit. > + */ > + sev_clflush_pages(pin_ram->pages, pin_ram->npages); > + > + pin_ram->userspace.address = ram->address; > + pin_ram->userspace.size = ram->size; > + list_add_tail(&pin_ram->list, head); > + return 0; <---- newline here. > +e_free: > + kfree(pin_ram); > + return 1; > +} > + > +static struct kvm_sev_pin_ram *sev_find_pinned_ram(struct kvm *kvm, > + struct kvm_memory_encrypt_ram *ram) So this function signature is almost impossible to read: you have "kvm" "sev" "pin" "ram" and those long structure names. Now look how something like this: static struct regions_list * sev_find_pinned_memory(struct kvm *kvm, struct enc_range *range) tells you exactly what the function does. > +{ > + struct list_head *head = sev_get_ram_list(kvm); > + struct kvm_sev_pin_ram *i; > + > + list_for_each_entry(i, head, list) { > + if (i->userspace.address == ram->address && > + i->userspace.size == ram->size) if (i->usr.addr == reg->addr && i->usr.size == reg->size) reads much better to me. > + return i; > + } > + > + return NULL; > +} > + > +static void __mem_encrypt_unregister_ram(struct kvm_sev_pin_ram *ram) > +{ > + /* > + * Guest may have changed the memory encryption attribute from "The guest" > + * C=0 -> C=1. Lets make sure caches are flushed to ensure in data Both comments talk about the 0 -> 1 case for the C-bit. What about the reverse: 1->0? Do we not flush there or we don't have cases where a guest doesn't decrypt its memory? > + * gets written into memory with correct C-bit. > + */ > + sev_clflush_pages(ram->pages, ram->npages); > + > + sev_unpin_memory(ram->pages, ram->npages); > + list_del(&ram->list); > + kfree(ram); > +} > + > +static int mem_encrypt_unregister_ram(struct kvm *kvm, > + struct kvm_memory_encrypt_ram *ram) > +{ > + struct kvm_sev_pin_ram *pinned_ram; > + > + if (!sev_guest(kvm)) > + return -ENOTTY; > + > + pinned_ram = sev_find_pinned_ram(kvm, ram); > + if (!pinned_ram) > + return -EINVAL; > + > + __mem_encrypt_unregister_ram(pinned_ram); > + > + return 0; > +} > + > static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > .cpu_has_kvm_support = has_svm, > .disabled_by_bios = is_disabled, > @@ -6551,6 +6662,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > .setup_mce = svm_setup_mce, > > .memory_encryption_op = svm_memory_encryption_op, > + .memory_encryption_register_ram = mem_encrypt_register_ram, > + .memory_encryption_unregister_ram = mem_encrypt_unregister_ram, Names are too long. mem_encrypt_reg_memory or so I guess. In general, choose a prefix and stick with it. mem_enc, mem_encrypt, mem_crypt... -- Regards/Gruss, Boris. SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg) --