Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753846Ab0AZCgA (ORCPT ); Mon, 25 Jan 2010 21:36:00 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1753765Ab0AZCf4 (ORCPT ); Mon, 25 Jan 2010 21:35:56 -0500 Received: from wolverine01.qualcomm.com ([199.106.114.254]:44452 "EHLO wolverine01.qualcomm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753731Ab0AZCfx (ORCPT ); Mon, 25 Jan 2010 21:35:53 -0500 X-IronPort-AV: E=McAfee;i="5400,1158,5872"; a="32773448" From: adharmap@codeaurora.org To: linux-kernel@vger.kernel.org Cc: linux-arm-msm@vger.kernel.org, dwalker@codeaurora.org, Arnd Bergmann , FUJITA Tomonori , Andrew Morton , Ingo Molnar , Joerg Roedel , Maciej Sosnowski , Dan Williams , Becky Bruce , Benjamin Herrenschmidt , Yang Hongyang , linux-arch@vger.kernel.org, Abhijeet Dharmapurikar , Abhijeet Dharmapurikar Subject: [RFC PATCH] dma: Add barrierless dma mapping/unmapping api Date: Mon, 25 Jan 2010 18:35:46 -0800 Message-Id: <1264473346-32721-1-git-send-email-adharmap@codeaurora.org> X-Mailer: git-send-email 1.5.6.3 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 12712 Lines: 373 From: Abhijeet Dharmapurikar Tests have shown that there is a significant performance gain when a driver uses mapping/unmapping functions without a barrier multiple times and calls dsb() only after the last buffer. This api adds support for barrierless dma operations on ARMv6 and ARMv7. Signed-off-by: Abhijeet Dharmapurikar --- Please refer to the earlier post here http://lkml.org/lkml/2010/1/4/347. These changes only are only for the arm v6 and v7 architecture and introduce dma_map/unmap_single_nobarrier api. The idea is to extend the cpu_cache_fns.dma_.*_range functions to accept a third boolean argument that tells it whether to execute the barrier or skip it. Note that this applies over Linux 2.6.33-rc5 and needs a few changes to apply on the linux-next tree. arch/arm/include/asm/cacheflush.h | 12 ++++---- arch/arm/include/asm/dma-mapping.h | 57 ++++++++++++++++++++++++++++++++++-- arch/arm/mm/cache-v6.S | 15 ++++++---- arch/arm/mm/cache-v7.S | 19 +++++++++++- arch/arm/mm/dma-mapping.c | 17 ++++++----- drivers/staging/dream/pmem.c | 4 +- 6 files changed, 97 insertions(+), 27 deletions(-) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index c77d2fa..59ce7fb 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -227,9 +227,9 @@ struct cpu_cache_fns { void (*coherent_user_range)(unsigned long, unsigned long); void (*flush_kern_dcache_area)(void *, size_t); - void (*dma_inv_range)(const void *, const void *); - void (*dma_clean_range)(const void *, const void *); - void (*dma_flush_range)(const void *, const void *); + void (*dma_inv_range)(const void *, const void *, unsigned long); + void (*dma_clean_range)(const void *, const void *, unsigned long); + void (*dma_flush_range)(const void *, const void *, unsigned long); }; struct outer_cache_fns { @@ -288,9 +288,9 @@ extern void __cpuc_flush_dcache_area(void *, size_t); #define dmac_clean_range __glue(_CACHE,_dma_clean_range) #define dmac_flush_range __glue(_CACHE,_dma_flush_range) -extern void dmac_inv_range(const void *, const void *); -extern void dmac_clean_range(const void *, const void *); -extern void dmac_flush_range(const void *, const void *); +extern void dmac_inv_range(const void *, const void *, unsigned long); +extern void dmac_clean_range(const void *, const void *, unsigned long); +extern void dmac_flush_range(const void *, const void *, unsigned long); #endif diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index a96300b..066923c 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -66,7 +66,8 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) * platforms with CONFIG_DMABOUNCE. * Use the driver DMA support - see dma-mapping.h (dma_sync_*) */ -extern void dma_cache_maint(const void *kaddr, size_t size, int rw); +extern void dma_cache_maint(const void *kaddr, size_t size, int rw, + unsigned long barrier); extern void dma_cache_maint_page(struct page *page, unsigned long offset, size_t size, int rw); @@ -297,6 +298,7 @@ static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr, * * The device owns this memory once this call has completed. The CPU * can regain ownership by calling dma_unmap_single() or + * or dma_unmap_single_nobarrier followed by dsb/dmb or * dma_sync_single_for_cpu(). */ static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, @@ -305,12 +307,40 @@ static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, BUG_ON(!valid_dma_direction(dir)); if (!arch_is_coherent()) - dma_cache_maint(cpu_addr, size, dir); + dma_cache_maint(cpu_addr, size, dir, 1); return virt_to_dma(dev, cpu_addr); } /** + * dma_map_single_nobarrier - map a single buffer for streaming DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @cpu_addr: CPU direct mapped address of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed and a dsb is + * executed. The CPU + * can regain ownership by calling dma_unmap_single() or + * dma_map_single_nobarrier followed by dsb() or + * dma_sync_single_for_cpu(). + */ +static inline dma_addr_t dma_map_single_nobarrier(struct device *dev, + void *cpu_addr, size_t size, enum dma_data_direction dir) +{ + BUG_ON(!valid_dma_direction(dir)); + + if (!arch_is_coherent()) + dma_cache_maint(cpu_addr, size, dir, 0); + + return virt_to_dma(dev, cpu_addr); +} + + +/** * dma_map_page - map a portion of a page for streaming DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @page: page that buffer resides in @@ -356,6 +386,26 @@ static inline void dma_unmap_single(struct device *dev, dma_addr_t handle, } /** + * dma_unmap_single_nobarrier - unmap a single buffer previously mapped + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_single) + * @dir: DMA transfer direction (same as passed to dma_map_single) + * + * Unmap a single streaming mode DMA translation. The handle and size + * must match what was provided in the previous dma_map_single() call. + * All other usages are undefined. + * + * After this call, and a dsb(), reads by the CPU to the buffer are + * guaranteed to see whatever the device wrote there. + */ +static inline void dma_unmap_single_nobarrier(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + /* nothing to do */ +} + +/** * dma_unmap_page - unmap a buffer previously mapped through dma_map_page() * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @handle: DMA address of buffer @@ -413,7 +463,8 @@ static inline void dma_sync_single_range_for_device(struct device *dev, return; if (!arch_is_coherent()) - dma_cache_maint(dma_to_virt(dev, handle) + offset, size, dir); + dma_cache_maint(dma_to_virt(dev, handle) + offset, + size, dir, 1); } static inline void dma_sync_single_for_cpu(struct device *dev, diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 4ba0a24..a88ab56 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -186,7 +186,7 @@ ENTRY(v6_flush_kern_dcache_area) /* - * v6_dma_inv_range(start,end) + * v6_dma_inv_range(start,end,barrier) * * Invalidate the data cache within the specified region; we will * be performing a DMA operation in this region and we want to @@ -220,11 +220,12 @@ ENTRY(v6_dma_inv_range) cmp r0, r1 blo 1b mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + cmp r2, #1 + mcreq p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr /* - * v6_dma_clean_range(start,end) + * v6_dma_clean_range(start,end,barrier) * - start - virtual start address of region * - end - virtual end address of region */ @@ -240,11 +241,12 @@ ENTRY(v6_dma_clean_range) cmp r0, r1 blo 1b mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + cmp r2, #1 + mcreq p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr /* - * v6_dma_flush_range(start,end) + * v6_dma_flush_range(start,end,barrier) * - start - virtual start address of region * - end - virtual end address of region */ @@ -260,7 +262,8 @@ ENTRY(v6_dma_flush_range) cmp r0, r1 blo 1b mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + cmp r2, #1 + mcreq p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr __INITDATA diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 9073db8..147a325 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -217,6 +217,7 @@ ENDPROC(v7_flush_kern_dcache_area) * - end - virtual end address of region */ ENTRY(v7_dma_inv_range) + str r2,[sp, #-4]! dcache_line_size r2, r3 sub r3, r2, #1 tst r0, r3 @@ -231,16 +232,21 @@ ENTRY(v7_dma_inv_range) add r0, r0, r2 cmp r0, r1 blo 1b + ldr r2, [sp], #4 + cmp r2, #1 + bne 2f dsb +2: mov pc, lr ENDPROC(v7_dma_inv_range) /* - * v7_dma_clean_range(start,end) + * v7_dma_clean_range(start,end,barrier) * - start - virtual start address of region * - end - virtual end address of region */ ENTRY(v7_dma_clean_range) + str r2,[sp, #-4]! dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 @@ -249,16 +255,21 @@ ENTRY(v7_dma_clean_range) add r0, r0, r2 cmp r0, r1 blo 1b + ldr r2, [sp], #4 + cmp r2, #1 + bne 2f dsb +2: mov pc, lr ENDPROC(v7_dma_clean_range) /* - * v7_dma_flush_range(start,end) + * v7_dma_flush_range(start,end,barrier) * - start - virtual start address of region * - end - virtual end address of region */ ENTRY(v7_dma_flush_range) + str r2,[sp, #-4]! dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 @@ -267,7 +278,11 @@ ENTRY(v7_dma_flush_range) add r0, r0, r2 cmp r0, r1 blo 1b + ldr r2, [sp], #4 + cmp r2, #1 + bne 2f dsb +2: mov pc, lr ENDPROC(v7_dma_flush_range) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 26325cb..c9e3124 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -106,7 +106,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf */ ptr = page_address(page); memset(ptr, 0, size); - dmac_flush_range(ptr, ptr + size); + dmac_flush_range(ptr, ptr + size, 1); outer_flush_range(__pa(ptr), __pa(ptr) + size); return page; @@ -404,10 +404,11 @@ EXPORT_SYMBOL(dma_free_coherent); * platforms with CONFIG_DMABOUNCE. * Use the driver DMA support - see dma-mapping.h (dma_sync_*) */ -void dma_cache_maint(const void *start, size_t size, int direction) +void dma_cache_maint(const void *start, size_t size, int direction, + unsigned long barrier) { - void (*inner_op)(const void *, const void *); - void (*outer_op)(unsigned long, unsigned long); + void (*inner_op)(const void *, const void *, unsigned long); + void (*outer_op)(unsigned long, unsigned long, unsigned long); BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(start + size - 1)); @@ -428,7 +429,7 @@ void dma_cache_maint(const void *start, size_t size, int direction) BUG(); } - inner_op(start, start + size); + inner_op(start, start + size, barrier); outer_op(__pa(start), __pa(start) + size); } EXPORT_SYMBOL(dma_cache_maint); @@ -438,7 +439,7 @@ static void dma_cache_maint_contiguous(struct page *page, unsigned long offset, { void *vaddr; unsigned long paddr; - void (*inner_op)(const void *, const void *); + void (*inner_op)(const void *, const void *, unsigned long); void (*outer_op)(unsigned long, unsigned long); switch (direction) { @@ -460,12 +461,12 @@ static void dma_cache_maint_contiguous(struct page *page, unsigned long offset, if (!PageHighMem(page)) { vaddr = page_address(page) + offset; - inner_op(vaddr, vaddr + size); + inner_op(vaddr, vaddr + size, 1); } else { vaddr = kmap_high_get(page); if (vaddr) { vaddr += offset; - inner_op(vaddr, vaddr + size); + inner_op(vaddr, vaddr + size, 1); kunmap_high(page); } } diff --git a/drivers/staging/dream/pmem.c b/drivers/staging/dream/pmem.c index def6468..1b24945 100644 --- a/drivers/staging/dream/pmem.c +++ b/drivers/staging/dream/pmem.c @@ -802,7 +802,7 @@ void flush_pmem_file(struct file *file, unsigned long offset, unsigned long len) vaddr = pmem_start_vaddr(id, data); /* if this isn't a submmapped file, flush the whole thing */ if (unlikely(!(data->flags & PMEM_FLAGS_CONNECTED))) { - dmac_flush_range(vaddr, vaddr + pmem_len(id, data)); + dmac_flush_range(vaddr, vaddr + pmem_len(id, data), 1); goto end; } /* otherwise, flush the region of the file we are drawing */ @@ -813,7 +813,7 @@ void flush_pmem_file(struct file *file, unsigned long offset, unsigned long len) region_node->region.len))) { flush_start = vaddr + region_node->region.offset; flush_end = flush_start + region_node->region.len; - dmac_flush_range(flush_start, flush_end); + dmac_flush_range(flush_start, flush_end, 1); break; } } -- 1.5.6.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/