Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752237AbbKKIII (ORCPT ); Wed, 11 Nov 2015 03:08:08 -0500 Received: from hqemgate16.nvidia.com ([216.228.121.65]:5872 "EHLO hqemgate16.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751890AbbKKIIG (ORCPT ); Wed, 11 Nov 2015 03:08:06 -0500 X-PGP-Universal: processed; by hqnvupgp08.nvidia.com on Wed, 11 Nov 2015 00:05:50 -0800 From: Alexandre Courbot To: Ben Skeggs , Dave Airlie CC: Andrew Morton , Guenter Roeck , nouveau@lists.freedesktop.org, linux-kernel@vger.kernel.org, gnurou@gmail.com, Alexandre Courbot Subject: [PATCH] instmem/gk20a: use DMA API CPU mapping Date: Wed, 11 Nov 2015 17:07:51 +0900 Message-ID: <1447229271-22901-1-git-send-email-acourbot@nvidia.com> X-Mailer: git-send-email 2.6.2 X-NVConfidentiality: public MIME-Version: 1.0 Content-Type: text/plain Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 11503 Lines: 357 Commit 69c4938249fb ("drm/nouveau/instmem/gk20a: use direct CPU access") tried to be smart while using the DMA-API by managing the CPU mappings of buffers allocated with the DMA-API by itself. In doing so, it relied on dma_to_phys() which is an architecture-private function not available everywhere. This broke the build on several architectures. Since there is no reliable and portable way to obtain the physical address of a DMA-API buffer, stop trying to be smart and just use the CPU mapping that the DMA-API can provide. This means that buffers will be CPU-mapped for all their life as opposed to when we need them, but anyway using the DMA-API here is a fallback for when no IOMMU is available so we should not expect optimal behavior. This makes the IOMMU and DMA-API implementations of instmem diverge enough that we should maybe put them into separate files... Signed-off-by: Alexandre Courbot --- Ben/Dave, since Dave's fix has reached mainline and builds are not broken anymore, we can proceed one of two ways: 1) Ben merges this for 4.4 and let it flow for -rc2 2) I send another fix against the kernel tree Which one shall I do? drm/nouveau/nvkm/subdev/instmem/gk20a.c | 148 +++++++++++++------------------- lib/include/nvif/os.h | 6 -- 2 files changed, 62 insertions(+), 92 deletions(-) diff --git a/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 681b2541229a..4c20fec64d96 100644 --- a/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -56,9 +56,6 @@ struct gk20a_instobj { /* CPU mapping */ u32 *vaddr; - struct list_head vaddr_node; - /* How many clients are using vaddr? */ - u32 use_cpt; }; #define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) @@ -68,7 +65,6 @@ struct gk20a_instobj { struct gk20a_instobj_dma { struct gk20a_instobj base; - u32 *cpuaddr; dma_addr_t handle; struct nvkm_mm_node r; }; @@ -81,6 +77,11 @@ struct gk20a_instobj_dma { struct gk20a_instobj_iommu { struct gk20a_instobj base; + /* to link into gk20a_instmem::vaddr_lru */ + struct list_head vaddr_node; + /* how many clients are using vaddr? */ + u32 use_cpt; + /* will point to the higher half of pages */ dma_addr_t *dma_addrs; /* array of base.mem->size pages (+ dma_addr_ts) */ @@ -109,8 +110,6 @@ struct gk20a_instmem { /* Only used by DMA API */ struct dma_attrs attrs; - - void __iomem * (*cpu_map)(struct nvkm_memory *); }; #define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base) @@ -132,46 +131,19 @@ gk20a_instobj_size(struct nvkm_memory *memory) return (u64)gk20a_instobj(memory)->mem.size << 12; } -static void __iomem * -gk20a_instobj_cpu_map_dma(struct nvkm_memory *memory) -{ - struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory); - struct device *dev = node->base.imem->base.subdev.device->dev; - int npages = nvkm_memory_size(memory) >> 12; - struct page *pages[npages]; - int i; - - /* phys_to_page does not exist on all platforms... */ - pages[0] = pfn_to_page(dma_to_phys(dev, node->handle) >> PAGE_SHIFT); - for (i = 1; i < npages; i++) - pages[i] = pages[0] + i; - - return vmap(pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); -} - -static void __iomem * -gk20a_instobj_cpu_map_iommu(struct nvkm_memory *memory) -{ - struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); - int npages = nvkm_memory_size(memory) >> 12; - - return vmap(node->pages, npages, VM_MAP, - pgprot_writecombine(PAGE_KERNEL)); -} - /* * Recycle the vaddr of obj. Must be called with gk20a_instmem::lock held. */ static void -gk20a_instobj_recycle_vaddr(struct gk20a_instobj *obj) +gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj) { - struct gk20a_instmem *imem = obj->imem; + struct gk20a_instmem *imem = obj->base.imem; /* there should not be any user left... */ WARN_ON(obj->use_cpt); list_del(&obj->vaddr_node); - vunmap(obj->vaddr); - obj->vaddr = NULL; - imem->vaddr_use -= nvkm_memory_size(&obj->memory); + vunmap(obj->base.vaddr); + obj->base.vaddr = NULL; + imem->vaddr_use -= nvkm_memory_size(&obj->base.memory); nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, imem->vaddr_max); } @@ -187,17 +159,30 @@ gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size) if (list_empty(&imem->vaddr_lru)) break; - gk20a_instobj_recycle_vaddr(list_first_entry(&imem->vaddr_lru, - struct gk20a_instobj, vaddr_node)); + gk20a_instobj_iommu_recycle_vaddr( + list_first_entry(&imem->vaddr_lru, + struct gk20a_instobj_iommu, vaddr_node)); } } static void __iomem * -gk20a_instobj_acquire(struct nvkm_memory *memory) +gk20a_instobj_acquire_dma(struct nvkm_memory *memory) { struct gk20a_instobj *node = gk20a_instobj(memory); struct gk20a_instmem *imem = node->imem; struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; + + nvkm_ltc_flush(ltc); + + return node->vaddr; +} + +static void __iomem * +gk20a_instobj_acquire_iommu(struct nvkm_memory *memory) +{ + struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); + struct gk20a_instmem *imem = node->base.imem; + struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; const u64 size = nvkm_memory_size(memory); unsigned long flags; @@ -205,7 +190,7 @@ gk20a_instobj_acquire(struct nvkm_memory *memory) spin_lock_irqsave(&imem->lock, flags); - if (node->vaddr) { + if (node->base.vaddr) { if (!node->use_cpt) { /* remove from LRU list since mapping in use again */ list_del(&node->vaddr_node); @@ -216,9 +201,10 @@ gk20a_instobj_acquire(struct nvkm_memory *memory) /* try to free some address space if we reached the limit */ gk20a_instmem_vaddr_gc(imem, size); - node->vaddr = imem->cpu_map(memory); - - if (!node->vaddr) { + /* map the pages */ + node->base.vaddr = vmap(node->pages, size >> PAGE_SHIFT, VM_MAP, + pgprot_writecombine(PAGE_KERNEL)); + if (!node->base.vaddr) { nvkm_error(&imem->base.subdev, "cannot map instobj - " "this is not going to end well...\n"); goto out; @@ -232,15 +218,25 @@ out: node->use_cpt++; spin_unlock_irqrestore(&imem->lock, flags); - return node->vaddr; + return node->base.vaddr; } static void -gk20a_instobj_release(struct nvkm_memory *memory) +gk20a_instobj_release_dma(struct nvkm_memory *memory) { struct gk20a_instobj *node = gk20a_instobj(memory); struct gk20a_instmem *imem = node->imem; struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; + + nvkm_ltc_invalidate(ltc); +} + +static void +gk20a_instobj_release_iommu(struct nvkm_memory *memory) +{ + struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); + struct gk20a_instmem *imem = node->base.imem; + struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; unsigned long flags; spin_lock_irqsave(&imem->lock, flags); @@ -284,27 +280,6 @@ gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) nvkm_vm_map_at(vma, offset, &node->mem); } -/* - * Clear the CPU mapping of an instobj if it exists - */ -static void -gk20a_instobj_dtor(struct gk20a_instobj *node) -{ - struct gk20a_instmem *imem = node->imem; - unsigned long flags; - - spin_lock_irqsave(&imem->lock, flags); - - /* vaddr has already been recycled */ - if (!node->vaddr) - goto out; - - gk20a_instobj_recycle_vaddr(node); - -out: - spin_unlock_irqrestore(&imem->lock, flags); -} - static void * gk20a_instobj_dtor_dma(struct nvkm_memory *memory) { @@ -312,12 +287,10 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory) struct gk20a_instmem *imem = node->base.imem; struct device *dev = imem->base.subdev.device->dev; - gk20a_instobj_dtor(&node->base); - - if (unlikely(!node->cpuaddr)) + if (unlikely(!node->base.vaddr)) goto out; - dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->cpuaddr, + dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr, node->handle, &imem->attrs); out: @@ -331,13 +304,20 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) struct gk20a_instmem *imem = node->base.imem; struct device *dev = imem->base.subdev.device->dev; struct nvkm_mm_node *r; + unsigned long flags; int i; - gk20a_instobj_dtor(&node->base); - if (unlikely(list_empty(&node->base.mem.regions))) goto out; + spin_lock_irqsave(&imem->lock, flags); + + /* vaddr has already been recycled */ + if (node->base.vaddr) + gk20a_instobj_iommu_recycle_vaddr(node); + + spin_unlock_irqrestore(&imem->lock, flags); + r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node, rl_entry); @@ -368,8 +348,8 @@ gk20a_instobj_func_dma = { .target = gk20a_instobj_target, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, - .acquire = gk20a_instobj_acquire, - .release = gk20a_instobj_release, + .acquire = gk20a_instobj_acquire_dma, + .release = gk20a_instobj_release_dma, .rd32 = gk20a_instobj_rd32, .wr32 = gk20a_instobj_wr32, .map = gk20a_instobj_map, @@ -381,8 +361,8 @@ gk20a_instobj_func_iommu = { .target = gk20a_instobj_target, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, - .acquire = gk20a_instobj_acquire, - .release = gk20a_instobj_release, + .acquire = gk20a_instobj_acquire_iommu, + .release = gk20a_instobj_release_iommu, .rd32 = gk20a_instobj_rd32, .wr32 = gk20a_instobj_wr32, .map = gk20a_instobj_map, @@ -402,10 +382,10 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); - node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, - &node->handle, GFP_KERNEL, - &imem->attrs); - if (!node->cpuaddr) { + node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, + &node->handle, GFP_KERNEL, + &imem->attrs); + if (!node->base.vaddr) { nvkm_error(subdev, "cannot allocate DMA memory\n"); return -ENOMEM; } @@ -611,18 +591,14 @@ gk20a_instmem_new(struct nvkm_device *device, int index, imem->mm = &tdev->iommu.mm; imem->domain = tdev->iommu.domain; imem->iommu_pgshift = tdev->iommu.pgshift; - imem->cpu_map = gk20a_instobj_cpu_map_iommu; imem->iommu_bit = tdev->func->iommu_bit; nvkm_info(&imem->base.subdev, "using IOMMU\n"); } else { init_dma_attrs(&imem->attrs); - /* We will access the memory through our own mapping */ dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs); dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &imem->attrs); - imem->cpu_map = gk20a_instobj_cpu_map_dma; nvkm_info(&imem->base.subdev, "using DMA API\n"); } diff --git a/lib/include/nvif/os.h b/lib/include/nvif/os.h index 2df30489179a..e8c06cb254dc 100644 --- a/lib/include/nvif/os.h +++ b/lib/include/nvif/os.h @@ -943,12 +943,6 @@ dma_unmap_page(struct device *pdev, dma_addr_t addr, int size, unsigned flags) { } -static inline phys_addr_t -dma_to_phys(struct device *dev, dma_addr_t addr) -{ - return 0; -} - /****************************************************************************** * PCI *****************************************************************************/ -- 2.6.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/