Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751291AbdHXBGy (ORCPT ); Wed, 23 Aug 2017 21:06:54 -0400 Received: from szxga03-in.huawei.com ([45.249.212.189]:10882 "EHLO szxga03-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751157AbdHXBGw (ORCPT ); Wed, 23 Aug 2017 21:06:52 -0400 From: Nixiaoming To: David Hildenbrand , Paul Mackerras CC: "agraf@suse.com" , "pbonzini@redhat.com" , "rkrcmar@redhat.com" , "benh@kernel.crashing.org" , "mpe@ellerman.id.au" , "kvm-ppc@vger.kernel.org" , "kvm@vger.kernel.org" , "linuxppc-dev@lists.ozlabs.org" , "linux-kernel@vger.kernel.org" Subject: Re: [PATCH] fix memory leak on kvm_vm_ioctl_create_spapr_tce Thread-Topic: [PATCH] fix memory leak on kvm_vm_ioctl_create_spapr_tce Thread-Index: AQHTG+lvEXXP6T/6b0+URT2h8ly0L6KSsWgw Date: Thu, 24 Aug 2017 01:06:27 +0000 Message-ID: References: <20170822142823.69425-1-nixiaoming@huawei.com> <95fe182a-fd21-77a1-33df-0e609c2845fd@redhat.com> <8fd9a878-a8ce-5576-9a5c-1c221ff6ded7@redhat.com> <20170823060624.GA13958@fergus.ozlabs.ibm.com> In-Reply-To: Accept-Language: zh-CN, en-US Content-Language: zh-CN X-MS-Has-Attach: X-MS-TNEF-Correlator: x-originating-ip: [10.57.88.168] Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 X-CFilter-Loop: Reflected X-Mirapoint-Virus-RAPID-Raw: score=unknown(0), refid=str=0001.0A090206.599E2697.0080,ss=1,re=0.000,recu=0.000,reip=0.000,cl=1,cld=1,fgs=0, ip=169.254.1.46, so=2014-11-16 11:51:01, dmn=2013-03-21 17:37:32 X-Mirapoint-Loop-Id: 5a6d9ed8c15f7a9b425391b48d61377a Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: 8bit X-MIME-Autoconverted: from base64 to 8bit by nfs id v7O1703k025173 Content-Length: 7174 Lines: 255 >On 23.08.2017 08:06, Paul Mackerras wrote: >> On Wed, Aug 23, 2017 at 01:43:08AM +0000, Nixiaoming wrote: >>>> On 22.08.2017 17:15, David Hildenbrand wrote: >>>>> On 22.08.2017 16:28, nixiaoming wrote: >>>>>> miss kfree(stt) when anon_inode_getfd return fail so add check >>>>>> anon_inode_getfd return val, and kfree stt >>>>>> >>>>>> Signed-off-by: nixiaoming >>>>>> --- >>>>>> arch/powerpc/kvm/book3s_64_vio.c | 5 ++++- >>>>>> 1 file changed, 4 insertions(+), 1 deletion(-) >>>>>> >>>>>> diff --git a/arch/powerpc/kvm/book3s_64_vio.c >>>>>> b/arch/powerpc/kvm/book3s_64_vio.c >>>>>> index a160c14..a0b4459 100644 >>>>>> --- a/arch/powerpc/kvm/book3s_64_vio.c >>>>>> +++ b/arch/powerpc/kvm/book3s_64_vio.c >>>>>> @@ -341,8 +341,11 @@ long kvm_vm_ioctl_create_spapr_tce(struct >>>>>> kvm *kvm, >>>>>> >>>>>> mutex_unlock(&kvm->lock); >>>>>> >>>>>> - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, >>>>>> + ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, >>>>>> stt, O_RDWR | O_CLOEXEC); >>>>>> + if (ret < 0) >>>>>> + goto fail; >>>>>> + return ret; >>>>>> >>>>>> fail: >>>>>> if (stt) { >>>>>> >>>>> >>>>> >>>>> stt has already been added to kvm->arch.spapr_tce_tables, so >>>>> freeing it is evil IMHO. I don't know that code, so I don't know >>>>> if there is some other place that will make sure that everything >>>>> in >>>>> kvm->arch.spapr_tce_tables will properly get freed, even when no >>>>> kvm->release >>>>> function has been called (kvm_spapr_tce_release). >>>>> >>>> >>>> If it is really not freed, than also kvm_put_kvm(stt->kvm) is missing. >>>> >>>> -- >>>> >>>> Thanks, >>>> >>>> David >>>> >>> >>> if (!stt) return -ENOMEM; >>> kvm_get_kvm(kvm); >>> if anon_inode_getfd return -ENOMEM >>> The user can not determine whether kvm_get_kvm has been called so >>> need add kvm_pet_kvm when anon_inode_getfd fail >>> >>> stt has already been added to kvm->arch.spapr_tce_tables, but if >>> anon_inode_getfd fail, stt is unused val, so call list_del_rcu, and >>> free as quickly as possible >>> >>> new patch: >>> >>> --- >>> arch/powerpc/kvm/book3s_64_vio.c | 10 +++++++++- >>> 1 file changed, 9 insertions(+), 1 deletion(-) >>> >>> diff --git a/arch/powerpc/kvm/book3s_64_vio.c >>> b/arch/powerpc/kvm/book3s_64_vio.c >>> index a160c14..e2228f1 100644 >>> --- a/arch/powerpc/kvm/book3s_64_vio.c >>> +++ b/arch/powerpc/kvm/book3s_64_vio.c >>> @@ -341,8 +341,16 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm >>> *kvm, >>> >>> mutex_unlock(&kvm->lock); >>> >>> - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, >>> + ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, >>> stt, O_RDWR | O_CLOEXEC); >>> + if (ret < 0) { >>> + mutex_lock(&kvm->lock); >>> + list_del_rcu(&stt->list); > >... don't we have to take care of rcu synchronization before freeing it? > >>> + mutex_unlock(&kvm->lock); >>> + kvm_put_kvm(kvm); >>> + goto fail; >>> + } >>> + return ret; > >of simply > >if (!ret) > return 0; > >mutex_lock(&kvm->lock); >list_del_rcu(&stt->list); >mutex_unlock(&kvm->lock); >kvm_put_kvm(kvm); > > >> >> It seems to me that it would be better to do the anon_inode_getfd() >> call before the kvm_get_kvm() call, and go to the fail label if it >> fails. > >I would have suggested to not add it to the list before it has been >fully created (so nobody can have access to it). But I guess than we >need another level of protection(e.g. kvm->lock). > >Am I missing something, or is kvm_vm_ioctl_create_spapr_tce() racy? > >The -EBUSY check is done without any locking, so two parallel creators >could create an inconsistency, no? Shouldn't this all be protected by >kvm->lock? > >> >> Paul. >> > >Independent of the fix, I'd suggest the following cleanup. > > >From 979f55083ee965e25827a8743e8a9fdb85231a6f Mon Sep 17 00:00:00 2001 >From: David Hildenbrand >Date: Wed, 23 Aug 2017 10:08:58 +0200 >Subject: [PATCH v1 1/1] KVM: PPC: cleanup kvm_vm_ioctl_create_spapr_tce > >Let's simplify error handling. > >Signed-off-by: David Hildenbrand >--- > arch/powerpc/kvm/book3s_64_vio.c | 29 +++++++++++------------------ > 1 file changed, 11 insertions(+), 18 deletions(-) > >diff --git a/arch/powerpc/kvm/book3s_64_vio.c >b/arch/powerpc/kvm/book3s_64_vio.c >index a160c14304eb..6bac49292296 100644 >--- a/arch/powerpc/kvm/book3s_64_vio.c >+++ b/arch/powerpc/kvm/book3s_64_vio.c >@@ -295,8 +295,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, >{ > struct kvmppc_spapr_tce_table *stt = NULL; > unsigned long npages, size; >- int ret = -ENOMEM; >- int i; >+ int i, ret; > > if (!args->size) > return -EINVAL; >@@ -310,16 +309,13 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, > size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); > npages = kvmppc_tce_pages(size); > ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); >- if (ret) { >- stt = NULL; >- goto fail; >- } >+ if (ret) >+ return ret; > >- ret = -ENOMEM; > stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), > GFP_KERNEL); > if (!stt) >- goto fail; >+ return -ENOMEM; > > stt->liobn = args->liobn; > stt->page_shift = args->page_shift; >@@ -331,7 +327,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, > for (i = 0; i < npages; i++) { > stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); > if (!stt->pages[i]) >- goto fail; >+ goto fail_free; > } > > kvm_get_kvm(kvm); >@@ -344,15 +340,12 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, > return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, > stt, O_RDWR | O_CLOEXEC); > >-fail: >- if (stt) { >- for (i = 0; i < npages; i++) >- if (stt->pages[i]) >- __free_page(stt->pages[i]); >- >- kfree(stt); >- } >- return ret; >+fail_free: >+ for (i = 0; i < npages; i++) >+ if (stt->pages[i]) >+ __free_page(stt->pages[i]); >+ kfree(stt); >+ return -ENOMEM; > } > > static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long >entry) >-- >2.13.5 > > >-- > >Thanks, > >David > Update patch based on advice from David Hildenbrand and Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index a160c14..517594a 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -334,16 +334,21 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, goto fail; } - kvm_get_kvm(kvm); - mutex_lock(&kvm->lock); list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); mutex_unlock(&kvm->lock); - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, stt, O_RDWR | O_CLOEXEC); - + if (ret >= 0) { + kvm_get_kvm(kvm); + return ret; + } + mutex_lock(&kvm->lock); + list_del_rcu(&stt->list); + mutex_unlock(&kvm->lock); + synchronize_rcu(); fail: if (stt) { for (i = 0; i < npages; i++) --