Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751830AbbEMF7Q (ORCPT ); Wed, 13 May 2015 01:59:16 -0400 Received: from e23smtp06.au.ibm.com ([202.81.31.148]:43371 "EHLO e23smtp06.au.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750875AbbEMF7O (ORCPT ); Wed, 13 May 2015 01:59:14 -0400 Date: Wed, 13 May 2015 15:58:09 +1000 From: Gavin Shan To: Alexey Kardashevskiy Cc: linuxppc-dev@lists.ozlabs.org, David Gibson , Benjamin Herrenschmidt , Paul Mackerras , Alex Williamson , Gavin Shan , Wei Yang , linux-kernel@vger.kernel.org Subject: Re: [PATCH kernel v10 06/34] vfio: powerpc/spapr: Move page pinning from arch code to VFIO IOMMU driver Message-ID: <20150513055809.GA3622@gwshan> Reply-To: Gavin Shan References: <1431358763-24371-1-git-send-email-aik@ozlabs.ru> <1431358763-24371-7-git-send-email-aik@ozlabs.ru> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1431358763-24371-7-git-send-email-aik@ozlabs.ru> User-Agent: Mutt/1.5.23 (2014-03-12) X-TM-AS-MML: disable X-Content-Scanned: Fidelis XPS MAILER x-cbid: 15051305-0021-0000-0000-00000139B878 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8795 Lines: 276 On Tue, May 12, 2015 at 01:38:55AM +1000, Alexey Kardashevskiy wrote: >This moves page pinning (get_user_pages_fast()/put_page()) code out of >the platform IOMMU code and puts it to VFIO IOMMU driver where it belongs >to as the platform code does not deal with page pinning. > >This makes iommu_take_ownership()/iommu_release_ownership() deal with >the IOMMU table bitmap only. > >This removes page unpinning from iommu_take_ownership() as the actual >TCE table might contain garbage and doing put_page() on it is undefined >behaviour. > >Besides the last part, the rest of the patch is mechanical. > >Signed-off-by: Alexey Kardashevskiy >[aw: for the vfio related changes] >Acked-by: Alex Williamson >Reviewed-by: David Gibson Reviewed-by: Gavin Shan >--- >Changes: >v9: >* added missing tce_iommu_clear call after iommu_release_ownership() >* brought @offset (a local variable) back to make patch even more >mechanical > >v4: >* s/iommu_tce_build(tbl, entry + 1/iommu_tce_build(tbl, entry + i/ >--- > arch/powerpc/include/asm/iommu.h | 4 -- > arch/powerpc/kernel/iommu.c | 55 ------------------------- > drivers/vfio/vfio_iommu_spapr_tce.c | 80 +++++++++++++++++++++++++++++++------ > 3 files changed, 67 insertions(+), 72 deletions(-) > >diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h >index 8353c86..e94a5e3 100644 >--- a/arch/powerpc/include/asm/iommu.h >+++ b/arch/powerpc/include/asm/iommu.h >@@ -194,10 +194,6 @@ extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, > unsigned long hwaddr, enum dma_data_direction direction); > extern unsigned long iommu_clear_tce(struct iommu_table *tbl, > unsigned long entry); >-extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, >- unsigned long entry, unsigned long pages); >-extern int iommu_put_tce_user_mode(struct iommu_table *tbl, >- unsigned long entry, unsigned long tce); > > extern void iommu_flush_tce(struct iommu_table *tbl); > extern int iommu_take_ownership(struct iommu_table *tbl); >diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c >index 2c02d4c..8673c94 100644 >--- a/arch/powerpc/kernel/iommu.c >+++ b/arch/powerpc/kernel/iommu.c >@@ -983,30 +983,6 @@ unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) > } > EXPORT_SYMBOL_GPL(iommu_clear_tce); > >-int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, >- unsigned long entry, unsigned long pages) >-{ >- unsigned long oldtce; >- struct page *page; >- >- for ( ; pages; --pages, ++entry) { >- oldtce = iommu_clear_tce(tbl, entry); >- if (!oldtce) >- continue; >- >- page = pfn_to_page(oldtce >> PAGE_SHIFT); >- WARN_ON(!page); >- if (page) { >- if (oldtce & TCE_PCI_WRITE) >- SetPageDirty(page); >- put_page(page); >- } >- } >- >- return 0; >-} >-EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages); >- > /* > * hwaddr is a kernel virtual address here (0xc... bazillion), > * tce_build converts it to a physical address. >@@ -1036,35 +1012,6 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, > } > EXPORT_SYMBOL_GPL(iommu_tce_build); > >-int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, >- unsigned long tce) >-{ >- int ret; >- struct page *page = NULL; >- unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; >- enum dma_data_direction direction = iommu_tce_direction(tce); >- >- ret = get_user_pages_fast(tce & PAGE_MASK, 1, >- direction != DMA_TO_DEVICE, &page); >- if (unlikely(ret != 1)) { >- /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", >- tce, entry << tbl->it_page_shift, ret); */ >- return -EFAULT; >- } >- hwaddr = (unsigned long) page_address(page) + offset; >- >- ret = iommu_tce_build(tbl, entry, hwaddr, direction); >- if (ret) >- put_page(page); >- >- if (ret < 0) >- pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", >- __func__, entry << tbl->it_page_shift, tce, ret); >- >- return ret; >-} >-EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode); >- > int iommu_take_ownership(struct iommu_table *tbl) > { > unsigned long sz = (tbl->it_size + 7) >> 3; >@@ -1078,7 +1025,6 @@ int iommu_take_ownership(struct iommu_table *tbl) > } > > memset(tbl->it_map, 0xff, sz); >- iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); > > /* > * Disable iommu bypass, otherwise the user can DMA to all of >@@ -1096,7 +1042,6 @@ void iommu_release_ownership(struct iommu_table *tbl) > { > unsigned long sz = (tbl->it_size + 7) >> 3; > >- iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); > memset(tbl->it_map, 0, sz); > > /* Restore bit#0 set by iommu_init_table() */ >diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c >index 730b4ef..b95fa2b 100644 >--- a/drivers/vfio/vfio_iommu_spapr_tce.c >+++ b/drivers/vfio/vfio_iommu_spapr_tce.c >@@ -147,6 +147,67 @@ static void tce_iommu_release(void *iommu_data) > kfree(container); > } > >+static int tce_iommu_clear(struct tce_container *container, >+ struct iommu_table *tbl, >+ unsigned long entry, unsigned long pages) >+{ >+ unsigned long oldtce; >+ struct page *page; >+ >+ for ( ; pages; --pages, ++entry) { >+ oldtce = iommu_clear_tce(tbl, entry); It might be nice to rename iommu_clear_tce() to iommu_tce_free() with another separate patch for two reasons as I can see: iommu_tce_{build, free} is one pair of functions doing opposite things. iommu_tce_free() is implemented based on ppc_md.tce_free() as iommu_tce_build() depends on ppc_md.tce_build(). >+ if (!oldtce) >+ continue; >+ >+ page = pfn_to_page(oldtce >> PAGE_SHIFT); >+ WARN_ON(!page); >+ if (page) { >+ if (oldtce & TCE_PCI_WRITE) >+ SetPageDirty(page); >+ put_page(page); >+ } >+ } >+ >+ return 0; >+} >+ >+static long tce_iommu_build(struct tce_container *container, >+ struct iommu_table *tbl, >+ unsigned long entry, unsigned long tce, unsigned long pages) >+{ >+ long i, ret = 0; >+ struct page *page = NULL; >+ unsigned long hva; >+ enum dma_data_direction direction = iommu_tce_direction(tce); >+ >+ for (i = 0; i < pages; ++i) { >+ unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; >+ >+ ret = get_user_pages_fast(tce & PAGE_MASK, 1, >+ direction != DMA_TO_DEVICE, &page); >+ if (unlikely(ret != 1)) { >+ ret = -EFAULT; >+ break; >+ } >+ hva = (unsigned long) page_address(page) + offset; >+ >+ ret = iommu_tce_build(tbl, entry + i, hva, direction); >+ if (ret) { >+ put_page(page); >+ pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", >+ __func__, entry << tbl->it_page_shift, >+ tce, ret); >+ break; >+ } >+ tce += IOMMU_PAGE_SIZE_4K; >+ } >+ >+ if (ret) >+ tce_iommu_clear(container, tbl, entry, i); >+ >+ return ret; >+} >+ > static long tce_iommu_ioctl(void *iommu_data, > unsigned int cmd, unsigned long arg) > { >@@ -195,7 +256,7 @@ static long tce_iommu_ioctl(void *iommu_data, > case VFIO_IOMMU_MAP_DMA: { > struct vfio_iommu_type1_dma_map param; > struct iommu_table *tbl = container->tbl; >- unsigned long tce, i; >+ unsigned long tce; > > if (!tbl) > return -ENXIO; >@@ -229,17 +290,9 @@ static long tce_iommu_ioctl(void *iommu_data, > if (ret) > return ret; > >- for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) { >- ret = iommu_put_tce_user_mode(tbl, >- (param.iova >> IOMMU_PAGE_SHIFT_4K) + i, >- tce); >- if (ret) >- break; >- tce += IOMMU_PAGE_SIZE_4K; >- } >- if (ret) >- iommu_clear_tces_and_put_pages(tbl, >- param.iova >> IOMMU_PAGE_SHIFT_4K, i); >+ ret = tce_iommu_build(container, tbl, >+ param.iova >> IOMMU_PAGE_SHIFT_4K, >+ tce, param.size >> IOMMU_PAGE_SHIFT_4K); > > iommu_flush_tce(tbl); > >@@ -273,7 +326,7 @@ static long tce_iommu_ioctl(void *iommu_data, > if (ret) > return ret; > >- ret = iommu_clear_tces_and_put_pages(tbl, >+ ret = tce_iommu_clear(container, tbl, > param.iova >> IOMMU_PAGE_SHIFT_4K, > param.size >> IOMMU_PAGE_SHIFT_4K); > iommu_flush_tce(tbl); >@@ -357,6 +410,7 @@ static void tce_iommu_detach_group(void *iommu_data, > /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", > iommu_group_id(iommu_group), iommu_group); */ > container->tbl = NULL; >+ tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); > iommu_release_ownership(tbl); > } > mutex_unlock(&container->lock); Thanks, Gavin >-- >2.4.0.rc3.8.gfb3e7d5 > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/