Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753594Ab3IYRDH (ORCPT ); Wed, 25 Sep 2013 13:03:07 -0400 Received: from mx1.redhat.com ([209.132.183.28]:19908 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751477Ab3IYRDD (ORCPT ); Wed, 25 Sep 2013 13:03:03 -0400 Message-ID: <1380128566.3030.327.camel@ul30vt.home> Subject: Re: [PATCH 6/7] vfio: moving some functions in common file From: Alex Williamson To: Bharat Bhushan Cc: joro@8bytes.org, benh@kernel.crashing.org, galak@kernel.crashing.org, linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-pci@vger.kernel.org, agraf@suse.de, scottwood@freescale.com, iommu@lists.linux-foundation.org, Bharat Bhushan Date: Wed, 25 Sep 2013 11:02:46 -0600 In-Reply-To: <1379575763-2091-7-git-send-email-Bharat.Bhushan@freescale.com> References: <1379575763-2091-1-git-send-email-Bharat.Bhushan@freescale.com> <1379575763-2091-7-git-send-email-Bharat.Bhushan@freescale.com> Content-Type: text/plain; charset="UTF-8" Mime-Version: 1.0 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 16231 Lines: 558 On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote: > Some function defined in vfio_iommu_type1.c were common and > we want to use these for FSL IOMMU (PAMU) and iommu-none driver. > So some of them are moved to vfio_iommu_common.c > > I think we can do more of that but we will take this step by step. > > Signed-off-by: Bharat Bhushan > --- > drivers/vfio/Makefile | 4 +- > drivers/vfio/vfio_iommu_common.c | 235 ++++++++++++++++++++++++++++++++++++++ > drivers/vfio/vfio_iommu_common.h | 30 +++++ > drivers/vfio/vfio_iommu_type1.c | 206 +--------------------------------- > 4 files changed, 268 insertions(+), 207 deletions(-) > create mode 100644 drivers/vfio/vfio_iommu_common.c > create mode 100644 drivers/vfio/vfio_iommu_common.h > > diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile > index 72bfabc..c5792ec 100644 > --- a/drivers/vfio/Makefile > +++ b/drivers/vfio/Makefile > @@ -1,4 +1,4 @@ > obj-$(CONFIG_VFIO) += vfio.o > -obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o > -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o > +obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_common.o vfio_iommu_type1.o > +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_common.o vfio_iommu_spapr_tce.o > obj-$(CONFIG_VFIO_PCI) += pci/ > diff --git a/drivers/vfio/vfio_iommu_common.c b/drivers/vfio/vfio_iommu_common.c > new file mode 100644 > index 0000000..8bdc0ea > --- /dev/null > +++ b/drivers/vfio/vfio_iommu_common.c > @@ -0,0 +1,235 @@ > +/* > + * VFIO: Common code for vfio IOMMU support > + * > + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. > + * Author: Alex Williamson > + * Author: Bharat Bhushan > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * Derived from original vfio: > + * Copyright 2010 Cisco Systems, Inc. All rights reserved. > + * Author: Tom Lyon, pugs@cisco.com > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include /* pci_bus_type */ > +#include > +#include > +#include > +#include > +#include > +#include Please cleanup includes on both the source and target files. You obviously don't need linux/pci.h here for one. > + > +static bool disable_hugepages; > +module_param_named(disable_hugepages, > + disable_hugepages, bool, S_IRUGO | S_IWUSR); > +MODULE_PARM_DESC(disable_hugepages, > + "Disable VFIO IOMMU support for IOMMU hugepages."); > + > +struct vwork { > + struct mm_struct *mm; > + long npage; > + struct work_struct work; > +}; > + > +/* delayed decrement/increment for locked_vm */ > +void vfio_lock_acct_bg(struct work_struct *work) > +{ > + struct vwork *vwork = container_of(work, struct vwork, work); > + struct mm_struct *mm; > + > + mm = vwork->mm; > + down_write(&mm->mmap_sem); > + mm->locked_vm += vwork->npage; > + up_write(&mm->mmap_sem); > + mmput(mm); > + kfree(vwork); > +} > + > +void vfio_lock_acct(long npage) > +{ > + struct vwork *vwork; > + struct mm_struct *mm; > + > + if (!current->mm || !npage) > + return; /* process exited or nothing to do */ > + > + if (down_write_trylock(¤t->mm->mmap_sem)) { > + current->mm->locked_vm += npage; > + up_write(¤t->mm->mmap_sem); > + return; > + } > + > + /* > + * Couldn't get mmap_sem lock, so must setup to update > + * mm->locked_vm later. If locked_vm were atomic, we > + * wouldn't need this silliness > + */ > + vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); > + if (!vwork) > + return; > + mm = get_task_mm(current); > + if (!mm) { > + kfree(vwork); > + return; > + } > + INIT_WORK(&vwork->work, vfio_lock_acct_bg); > + vwork->mm = mm; > + vwork->npage = npage; > + schedule_work(&vwork->work); > +} > + > +/* > + * Some mappings aren't backed by a struct page, for example an mmap'd > + * MMIO range for our own or another device. These use a different > + * pfn conversion and shouldn't be tracked as locked pages. > + */ > +bool is_invalid_reserved_pfn(unsigned long pfn) > +{ > + if (pfn_valid(pfn)) { > + bool reserved; > + struct page *tail = pfn_to_page(pfn); > + struct page *head = compound_trans_head(tail); > + reserved = !!(PageReserved(head)); > + if (head != tail) { > + /* > + * "head" is not a dangling pointer > + * (compound_trans_head takes care of that) > + * but the hugepage may have been split > + * from under us (and we may not hold a > + * reference count on the head page so it can > + * be reused before we run PageReferenced), so > + * we've to check PageTail before returning > + * what we just read. > + */ > + smp_rmb(); > + if (PageTail(tail)) > + return reserved; > + } > + return PageReserved(tail); > + } > + > + return true; > +} > + > +int put_pfn(unsigned long pfn, int prot) > +{ > + if (!is_invalid_reserved_pfn(pfn)) { > + struct page *page = pfn_to_page(pfn); > + if (prot & IOMMU_WRITE) > + SetPageDirty(page); > + put_page(page); > + return 1; > + } > + return 0; > +} > + > +static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) > +{ > + struct page *page[1]; > + struct vm_area_struct *vma; > + int ret = -EFAULT; > + > + if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { > + *pfn = page_to_pfn(page[0]); > + return 0; > + } > + > + printk("via vma\n"); > + down_read(¤t->mm->mmap_sem); > + > + vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); > + > + if (vma && vma->vm_flags & VM_PFNMAP) { > + *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; > + if (is_invalid_reserved_pfn(*pfn)) > + ret = 0; > + } > + > + up_read(¤t->mm->mmap_sem); > + > + return ret; > +} > + > +/* > + * Attempt to pin pages. We really don't want to track all the pfns and > + * the iommu can only map chunks of consecutive pfns anyway, so get the > + * first page and all consecutive pages with the same locking. > + */ > +long vfio_pin_pages(unsigned long vaddr, long npage, > + int prot, unsigned long *pfn_base) > +{ > + unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > + bool lock_cap = capable(CAP_IPC_LOCK); > + long ret, i; > + > + if (!current->mm) > + return -ENODEV; > + > + ret = vaddr_get_pfn(vaddr, prot, pfn_base); > + if (ret) > + return ret; > + > + if (is_invalid_reserved_pfn(*pfn_base)) > + return 1; > + > + if (!lock_cap && current->mm->locked_vm + 1 > limit) { > + put_pfn(*pfn_base, prot); > + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, > + limit << PAGE_SHIFT); > + return -ENOMEM; > + } > + > + if (unlikely(disable_hugepages)) { > + vfio_lock_acct(1); > + return 1; > + } > + > + /* Lock all the consecutive pages from pfn_base */ > + for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { > + unsigned long pfn = 0; > + > + ret = vaddr_get_pfn(vaddr, prot, &pfn); > + if (ret) > + break; > + > + if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { > + put_pfn(pfn, prot); > + break; > + } > + > + if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { > + put_pfn(pfn, prot); > + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", > + __func__, limit << PAGE_SHIFT); > + break; > + } > + } > + > + vfio_lock_acct(i); > + > + return i; > +} > + > +long vfio_unpin_pages(unsigned long pfn, long npage, > + int prot, bool do_accounting) > +{ > + unsigned long unlocked = 0; > + long i; > + > + for (i = 0; i < npage; i++) > + unlocked += put_pfn(pfn++, prot); > + > + if (do_accounting) > + vfio_lock_acct(-unlocked); > + > + return unlocked; > +} > diff --git a/drivers/vfio/vfio_iommu_common.h b/drivers/vfio/vfio_iommu_common.h > new file mode 100644 > index 0000000..4738391 > --- /dev/null > +++ b/drivers/vfio/vfio_iommu_common.h > @@ -0,0 +1,30 @@ > +/* > + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. > + * Copyright (C) 2013 Freescale Semiconductor, Inc. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms of the GNU General Public License version 2 as published > + * by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + */ > + > +#ifndef _VFIO_IOMMU_COMMON_H > +#define _VFIO_IOMMU_COMMON_H > + > +void vfio_lock_acct_bg(struct work_struct *work); Does this need to be exposed? > +void vfio_lock_acct(long npage); > +bool is_invalid_reserved_pfn(unsigned long pfn); > +int put_pfn(unsigned long pfn, int prot); Why are these exposed, they only seem to be used by functions in the new common file. > +long vfio_pin_pages(unsigned long vaddr, long npage, int prot, > + unsigned long *pfn_base); > +long vfio_unpin_pages(unsigned long pfn, long npage, > + int prot, bool do_accounting); Can we get by with just these two and vfio_lock_acct()? > +#endif > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index a9807de..e9a58fa 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -37,6 +37,7 @@ > #include > #include > #include > +#include "vfio_iommu_common.h" > > #define DRIVER_VERSION "0.2" > #define DRIVER_AUTHOR "Alex Williamson " > @@ -48,12 +49,6 @@ module_param_named(allow_unsafe_interrupts, > MODULE_PARM_DESC(allow_unsafe_interrupts, > "Enable VFIO IOMMU support for on platforms without interrupt remapping support."); > > -static bool disable_hugepages; > -module_param_named(disable_hugepages, > - disable_hugepages, bool, S_IRUGO | S_IWUSR); > -MODULE_PARM_DESC(disable_hugepages, > - "Disable VFIO IOMMU support for IOMMU hugepages."); > - > struct vfio_iommu { > struct iommu_domain *domain; > struct mutex lock; > @@ -123,205 +118,6 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *old) > rb_erase(&old->node, &iommu->dma_list); > } > > -struct vwork { > - struct mm_struct *mm; > - long npage; > - struct work_struct work; > -}; > - > -/* delayed decrement/increment for locked_vm */ > -static void vfio_lock_acct_bg(struct work_struct *work) > -{ > - struct vwork *vwork = container_of(work, struct vwork, work); > - struct mm_struct *mm; > - > - mm = vwork->mm; > - down_write(&mm->mmap_sem); > - mm->locked_vm += vwork->npage; > - up_write(&mm->mmap_sem); > - mmput(mm); > - kfree(vwork); > -} > - > -static void vfio_lock_acct(long npage) > -{ > - struct vwork *vwork; > - struct mm_struct *mm; > - > - if (!current->mm || !npage) > - return; /* process exited or nothing to do */ > - > - if (down_write_trylock(¤t->mm->mmap_sem)) { > - current->mm->locked_vm += npage; > - up_write(¤t->mm->mmap_sem); > - return; > - } > - > - /* > - * Couldn't get mmap_sem lock, so must setup to update > - * mm->locked_vm later. If locked_vm were atomic, we > - * wouldn't need this silliness > - */ > - vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); > - if (!vwork) > - return; > - mm = get_task_mm(current); > - if (!mm) { > - kfree(vwork); > - return; > - } > - INIT_WORK(&vwork->work, vfio_lock_acct_bg); > - vwork->mm = mm; > - vwork->npage = npage; > - schedule_work(&vwork->work); > -} > - > -/* > - * Some mappings aren't backed by a struct page, for example an mmap'd > - * MMIO range for our own or another device. These use a different > - * pfn conversion and shouldn't be tracked as locked pages. > - */ > -static bool is_invalid_reserved_pfn(unsigned long pfn) > -{ > - if (pfn_valid(pfn)) { > - bool reserved; > - struct page *tail = pfn_to_page(pfn); > - struct page *head = compound_trans_head(tail); > - reserved = !!(PageReserved(head)); > - if (head != tail) { > - /* > - * "head" is not a dangling pointer > - * (compound_trans_head takes care of that) > - * but the hugepage may have been split > - * from under us (and we may not hold a > - * reference count on the head page so it can > - * be reused before we run PageReferenced), so > - * we've to check PageTail before returning > - * what we just read. > - */ > - smp_rmb(); > - if (PageTail(tail)) > - return reserved; > - } > - return PageReserved(tail); > - } > - > - return true; > -} > - > -static int put_pfn(unsigned long pfn, int prot) > -{ > - if (!is_invalid_reserved_pfn(pfn)) { > - struct page *page = pfn_to_page(pfn); > - if (prot & IOMMU_WRITE) > - SetPageDirty(page); > - put_page(page); > - return 1; > - } > - return 0; > -} > - > -static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) > -{ > - struct page *page[1]; > - struct vm_area_struct *vma; > - int ret = -EFAULT; > - > - if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { > - *pfn = page_to_pfn(page[0]); > - return 0; > - } > - > - down_read(¤t->mm->mmap_sem); > - > - vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); > - > - if (vma && vma->vm_flags & VM_PFNMAP) { > - *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; > - if (is_invalid_reserved_pfn(*pfn)) > - ret = 0; > - } > - > - up_read(¤t->mm->mmap_sem); > - > - return ret; > -} > - > -/* > - * Attempt to pin pages. We really don't want to track all the pfns and > - * the iommu can only map chunks of consecutive pfns anyway, so get the > - * first page and all consecutive pages with the same locking. > - */ > -static long vfio_pin_pages(unsigned long vaddr, long npage, > - int prot, unsigned long *pfn_base) > -{ > - unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > - bool lock_cap = capable(CAP_IPC_LOCK); > - long ret, i; > - > - if (!current->mm) > - return -ENODEV; > - > - ret = vaddr_get_pfn(vaddr, prot, pfn_base); > - if (ret) > - return ret; > - > - if (is_invalid_reserved_pfn(*pfn_base)) > - return 1; > - > - if (!lock_cap && current->mm->locked_vm + 1 > limit) { > - put_pfn(*pfn_base, prot); > - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, > - limit << PAGE_SHIFT); > - return -ENOMEM; > - } > - > - if (unlikely(disable_hugepages)) { > - vfio_lock_acct(1); > - return 1; > - } > - > - /* Lock all the consecutive pages from pfn_base */ > - for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { > - unsigned long pfn = 0; > - > - ret = vaddr_get_pfn(vaddr, prot, &pfn); > - if (ret) > - break; > - > - if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { > - put_pfn(pfn, prot); > - break; > - } > - > - if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { > - put_pfn(pfn, prot); > - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", > - __func__, limit << PAGE_SHIFT); > - break; > - } > - } > - > - vfio_lock_acct(i); > - > - return i; > -} > - > -static long vfio_unpin_pages(unsigned long pfn, long npage, > - int prot, bool do_accounting) > -{ > - unsigned long unlocked = 0; > - long i; > - > - for (i = 0; i < npage; i++) > - unlocked += put_pfn(pfn++, prot); > - > - if (do_accounting) > - vfio_lock_acct(-unlocked); > - > - return unlocked; > -} > - > static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, > dma_addr_t iova, size_t *size) > { -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/