Hi all,
this series moves the existing arm64 implementation of arch_dma_alloc and
arch_dma_free to common code given that it is not arm64-specific, and
then also uses it for csky. Given how many architectures remap memory
for the DMA coherent implementation it should be usable for many more,
and the new cache flushing hook and the generic atomic pool are also
enablers for implementing the IOMMU API dma ops in common code in a
follow on series.
Some architectures support remapping highmem into DMA coherent
allocations. To use the common code for them we need variants of
dma_direct_{alloc,free}_pages that do not use kernel virtual addresses.
Signed-off-by: Christoph Hellwig <[email protected]>
---
include/linux/dma-direct.h | 3 +++
kernel/dma/direct.c | 32 ++++++++++++++++++++++----------
2 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index bd73e7a91410..5a7a3bbb912f 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -67,6 +67,9 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs);
+struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
+void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page);
dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs);
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 22a12ab5a5e9..680287779b0a 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -103,14 +103,13 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask);
}
-void *dma_direct_alloc_pages(struct device *dev, size_t size,
+struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
int page_order = get_order(size);
struct page *page = NULL;
u64 phys_mask;
- void *ret;
if (attrs & DMA_ATTR_NO_WARN)
gfp |= __GFP_NOWARN;
@@ -150,11 +149,22 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
}
}
+ return page;
+}
+
+void *dma_direct_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+{
+ struct page *page;
+ void *ret;
+
+ page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
if (!page)
return NULL;
+
ret = page_address(page);
if (force_dma_unencrypted()) {
- set_memory_decrypted((unsigned long)ret, 1 << page_order);
+ set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
*dma_handle = __phys_to_dma(dev, page_to_phys(page));
} else {
*dma_handle = phys_to_dma(dev, page_to_phys(page));
@@ -163,20 +173,22 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
return ret;
}
-/*
- * NOTE: this function must never look at the dma_addr argument, because we want
- * to be able to use it as a helper for iommu implementations as well.
- */
+void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page)
+{
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ if (!dma_release_from_contiguous(dev, page, count))
+ __free_pages(page, get_order(size));
+}
+
void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned int page_order = get_order(size);
if (force_dma_unencrypted())
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
- if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
- free_pages((unsigned long)cpu_addr, page_order);
+ __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr));
}
void *dma_direct_alloc(struct device *dev, size_t size,
--
2.19.1
dma_alloc_from_contiguous can return highmem pages depending on the
setup, which a plain non-remapping DMA allocator can't handle. Detect
this case and try the normal page allocator instead.
Signed-off-by: Christoph Hellwig <[email protected]>
---
kernel/dma/direct.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 680287779b0a..c49849bcced6 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -162,6 +162,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
if (!page)
return NULL;
+ if (PageHighMem(page)) {
+ /*
+ * Depending on the cma= arguments and per-arch setup
+ * dma_alloc_from_contiguous could return highmem pages.
+ * Without remapping there is no way to return them here,
+ * so log an error and fail.
+ */
+ dev_info(dev, "Rejecting highmem page from CMA.\n");
+ __dma_direct_free_pages(dev, size, page);
+ return NULL;
+ }
+
ret = page_address(page);
if (force_dma_unencrypted()) {
set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
--
2.19.1
The dma remap code only really makes sense for not cache coherent
architectures, and currently is only used by arm, arm64 and xtensa.
Split it out into a separate file with a separate Kconfig symbol,
which gets the right copyright notice given that this code was
written by Laura Abbott working for Code Aurora at that point.
Signed-off-by: Christoph Hellwig <[email protected]>
Acked-by: Laura Abbott <[email protected]>
---
arch/arm/Kconfig | 1 +
arch/arm64/Kconfig | 1 +
arch/csky/Kconfig | 1 +
arch/xtensa/Kconfig | 1 +
kernel/dma/Kconfig | 4 ++
kernel/dma/Makefile | 2 +-
kernel/dma/mapping.c | 84 ------------------------------------------
kernel/dma/remap.c | 88 ++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 97 insertions(+), 85 deletions(-)
create mode 100644 kernel/dma/remap.c
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 91be74d8df65..3b2852df6eb3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -30,6 +30,7 @@ config ARM
select CPU_PM if (SUSPEND || CPU_IDLE)
select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
select DMA_DIRECT_OPS if !MMU
+ select DMA_REMAP if MMU
select EDAC_SUPPORT
select EDAC_ATOMIC_SCRUB
select GENERIC_ALLOCATOR
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 787d7850e064..5d065acb6d10 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -82,6 +82,7 @@ config ARM64
select CRC32
select DCACHE_WORD_ACCESS
select DMA_DIRECT_OPS
+ select DMA_REMAP
select EDAC_SUPPORT
select FRAME_POINTER
select GENERIC_ALLOCATOR
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index cb64f8dacd08..8a30e006a845 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -9,6 +9,7 @@ config CSKY
select CLKSRC_OF
select DMA_DIRECT_OPS
select DMA_NONCOHERENT_OPS
+ select DMA_REMAP
select IRQ_DOMAIN
select HANDLE_DOMAIN_IRQ
select DW_APB_TIMER_OF
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index d29b7365da8d..239bfb16c58b 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -11,6 +11,7 @@ config XTENSA
select CLONE_BACKWARDS
select COMMON_CLK
select DMA_DIRECT_OPS
+ select DMA_REMAP if MMU
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_IRQ_SHOW
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 645c7a2ecde8..c92e08173ed8 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -51,3 +51,7 @@ config SWIOTLB
bool
select DMA_DIRECT_OPS
select NEED_DMA_MAP_STATE
+
+config DMA_REMAP
+ depends on MMU
+ bool
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index 7d581e4eea4a..f4feeceb8020 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_DMA_DIRECT_OPS) += direct.o
obj-$(CONFIG_DMA_VIRT_OPS) += virt.o
obj-$(CONFIG_DMA_API_DEBUG) += debug.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
-
+obj-$(CONFIG_DMA_REMAP) += remap.o
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 58dec7a92b7b..dfbc3deb95cd 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -262,87 +262,3 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
}
EXPORT_SYMBOL(dma_common_mmap);
-
-#ifdef CONFIG_MMU
-static struct vm_struct *__dma_common_pages_remap(struct page **pages,
- size_t size, unsigned long vm_flags, pgprot_t prot,
- const void *caller)
-{
- struct vm_struct *area;
-
- area = get_vm_area_caller(size, vm_flags, caller);
- if (!area)
- return NULL;
-
- if (map_vm_area(area, prot, pages)) {
- vunmap(area->addr);
- return NULL;
- }
-
- return area;
-}
-
-/*
- * remaps an array of PAGE_SIZE pages into another vm_area
- * Cannot be used in non-sleeping contexts
- */
-void *dma_common_pages_remap(struct page **pages, size_t size,
- unsigned long vm_flags, pgprot_t prot,
- const void *caller)
-{
- struct vm_struct *area;
-
- area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
- if (!area)
- return NULL;
-
- area->pages = pages;
-
- return area->addr;
-}
-
-/*
- * remaps an allocated contiguous region into another vm_area.
- * Cannot be used in non-sleeping contexts
- */
-
-void *dma_common_contiguous_remap(struct page *page, size_t size,
- unsigned long vm_flags,
- pgprot_t prot, const void *caller)
-{
- int i;
- struct page **pages;
- struct vm_struct *area;
-
- pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
- if (!pages)
- return NULL;
-
- for (i = 0; i < (size >> PAGE_SHIFT); i++)
- pages[i] = nth_page(page, i);
-
- area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
-
- kfree(pages);
-
- if (!area)
- return NULL;
- return area->addr;
-}
-
-/*
- * unmaps a range previously mapped by dma_common_*_remap
- */
-void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
-{
- struct vm_struct *area = find_vm_area(cpu_addr);
-
- if (!area || (area->flags & vm_flags) != vm_flags) {
- WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
- return;
- }
-
- unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
- vunmap(cpu_addr);
-}
-#endif
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
new file mode 100644
index 000000000000..456f7cc3414d
--- /dev/null
+++ b/kernel/dma/remap.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2014 The Linux Foundation
+ */
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+static struct vm_struct *__dma_common_pages_remap(struct page **pages,
+ size_t size, unsigned long vm_flags, pgprot_t prot,
+ const void *caller)
+{
+ struct vm_struct *area;
+
+ area = get_vm_area_caller(size, vm_flags, caller);
+ if (!area)
+ return NULL;
+
+ if (map_vm_area(area, prot, pages)) {
+ vunmap(area->addr);
+ return NULL;
+ }
+
+ return area;
+}
+
+/*
+ * remaps an array of PAGE_SIZE pages into another vm_area
+ * Cannot be used in non-sleeping contexts
+ */
+void *dma_common_pages_remap(struct page **pages, size_t size,
+ unsigned long vm_flags, pgprot_t prot,
+ const void *caller)
+{
+ struct vm_struct *area;
+
+ area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
+ if (!area)
+ return NULL;
+
+ area->pages = pages;
+
+ return area->addr;
+}
+
+/*
+ * Remaps an allocated contiguous region into another vm_area.
+ * Cannot be used in non-sleeping contexts
+ */
+void *dma_common_contiguous_remap(struct page *page, size_t size,
+ unsigned long vm_flags,
+ pgprot_t prot, const void *caller)
+{
+ int i;
+ struct page **pages;
+ struct vm_struct *area;
+
+ pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
+ if (!pages)
+ return NULL;
+
+ for (i = 0; i < (size >> PAGE_SHIFT); i++)
+ pages[i] = nth_page(page, i);
+
+ area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
+
+ kfree(pages);
+
+ if (!area)
+ return NULL;
+ return area->addr;
+}
+
+/*
+ * Unmaps a range previously mapped by dma_common_*_remap
+ */
+void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
+{
+ struct vm_struct *area = find_vm_area(cpu_addr);
+
+ if (!area || (area->flags & vm_flags) != vm_flags) {
+ WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+ return;
+ }
+
+ unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
+ vunmap(cpu_addr);
+}
--
2.19.1
The arm64 codebase to implement coherent dma allocation for architectures
with non-coherent DMA is a good start for a generic implementation, given
that is uses the generic remap helpers, provides the atomic pool for
allocations that can't sleep and still is realtively simple and well
tested. Move it to kernel/dma and allow architectures to opt into it
using a config symbol. Architectures just need to provide a new
arch_dma_prep_coherent helper to writeback an invalidate the caches
for any memory that gets remapped for uncached access.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/arm64/Kconfig | 2 +-
arch/arm64/mm/dma-mapping.c | 184 ++------------------------------
include/linux/dma-mapping.h | 5 +
include/linux/dma-noncoherent.h | 2 +
kernel/dma/Kconfig | 6 ++
kernel/dma/remap.c | 158 ++++++++++++++++++++++++++-
6 files changed, 181 insertions(+), 176 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d065acb6d10..2e645ea693ea 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -82,7 +82,7 @@ config ARM64
select CRC32
select DCACHE_WORD_ACCESS
select DMA_DIRECT_OPS
- select DMA_REMAP
+ select DMA_DIRECT_REMAP
select EDAC_SUPPORT
select FRAME_POINTER
select GENERIC_ALLOCATOR
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a3ac26284845..e2e7e5d0f94e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -33,113 +33,6 @@
#include <asm/cacheflush.h>
-static struct gen_pool *atomic_pool __ro_after_init;
-
-#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
-static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
-
-static int __init early_coherent_pool(char *p)
-{
- atomic_pool_size = memparse(p, &p);
- return 0;
-}
-early_param("coherent_pool", early_coherent_pool);
-
-static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
-{
- unsigned long val;
- void *ptr = NULL;
-
- if (!atomic_pool) {
- WARN(1, "coherent pool not initialised!\n");
- return NULL;
- }
-
- val = gen_pool_alloc(atomic_pool, size);
- if (val) {
- phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
-
- *ret_page = phys_to_page(phys);
- ptr = (void *)val;
- memset(ptr, 0, size);
- }
-
- return ptr;
-}
-
-static bool __in_atomic_pool(void *start, size_t size)
-{
- return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
-}
-
-static int __free_from_pool(void *start, size_t size)
-{
- if (!__in_atomic_pool(start, size))
- return 0;
-
- gen_pool_free(atomic_pool, (unsigned long)start, size);
-
- return 1;
-}
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t flags, unsigned long attrs)
-{
- struct page *page;
- void *ptr, *coherent_ptr;
- pgprot_t prot = pgprot_writecombine(PAGE_KERNEL);
-
- size = PAGE_ALIGN(size);
-
- if (!gfpflags_allow_blocking(flags)) {
- struct page *page = NULL;
- void *addr = __alloc_from_pool(size, &page, flags);
-
- if (addr)
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
-
- return addr;
- }
-
- ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
- if (!ptr)
- goto no_mem;
-
- /* remove any dirty cache lines on the kernel alias */
- __dma_flush_area(ptr, size);
-
- /* create a coherent mapping */
- page = virt_to_page(ptr);
- coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
- prot, __builtin_return_address(0));
- if (!coherent_ptr)
- goto no_map;
-
- return coherent_ptr;
-
-no_map:
- dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs);
-no_mem:
- return NULL;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) {
- void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
-
- vunmap(vaddr);
- dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
- }
-}
-
-long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
- dma_addr_t dma_addr)
-{
- return __phys_to_pfn(dma_to_phys(dev, dma_addr));
-}
-
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs)
{
@@ -160,6 +53,11 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
__dma_unmap_area(phys_to_virt(paddr), size, dir);
}
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+ __dma_flush_area(page_address(page), size);
+}
+
#ifdef CONFIG_IOMMU_DMA
static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
struct page *page, size_t size)
@@ -191,67 +89,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
}
#endif /* CONFIG_IOMMU_DMA */
-static int __init atomic_pool_init(void)
-{
- pgprot_t prot = __pgprot(PROT_NORMAL_NC);
- unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
- struct page *page;
- void *addr;
- unsigned int pool_size_order = get_order(atomic_pool_size);
-
- if (dev_get_cma_area(NULL))
- page = dma_alloc_from_contiguous(NULL, nr_pages,
- pool_size_order, false);
- else
- page = alloc_pages(GFP_DMA32, pool_size_order);
-
- if (page) {
- int ret;
- void *page_addr = page_address(page);
-
- memset(page_addr, 0, atomic_pool_size);
- __dma_flush_area(page_addr, atomic_pool_size);
-
- atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
- if (!atomic_pool)
- goto free_page;
-
- addr = dma_common_contiguous_remap(page, atomic_pool_size,
- VM_USERMAP, prot, atomic_pool_init);
-
- if (!addr)
- goto destroy_genpool;
-
- ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
- page_to_phys(page),
- atomic_pool_size, -1);
- if (ret)
- goto remove_mapping;
-
- gen_pool_set_algo(atomic_pool,
- gen_pool_first_fit_order_align,
- NULL);
-
- pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
- atomic_pool_size / 1024);
- return 0;
- }
- goto out;
-
-remove_mapping:
- dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
-destroy_genpool:
- gen_pool_destroy(atomic_pool);
- atomic_pool = NULL;
-free_page:
- if (!dma_release_from_contiguous(NULL, page, nr_pages))
- __free_pages(page, pool_size_order);
-out:
- pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
- atomic_pool_size / 1024);
- return -ENOMEM;
-}
-
/********************************************
* The following APIs are for dummy DMA ops *
********************************************/
@@ -350,8 +187,7 @@ static int __init arm64_dma_init(void)
TAINT_CPU_OUT_OF_SPEC,
"ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
ARCH_DMA_MINALIGN, cache_line_size());
-
- return atomic_pool_init();
+ return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
}
arch_initcall(arm64_dma_init);
@@ -397,7 +233,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
page = alloc_pages(gfp, get_order(size));
addr = page ? page_address(page) : NULL;
} else {
- addr = __alloc_from_pool(size, &page, gfp);
+ addr = dma_alloc_from_pool(size, &page, gfp);
}
if (!addr)
return NULL;
@@ -407,7 +243,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
if (coherent)
__free_pages(page, get_order(size));
else
- __free_from_pool(addr, size);
+ dma_free_from_pool(addr, size);
addr = NULL;
}
} else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
@@ -471,9 +307,9 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
* coherent devices.
* Hence how dodgy the below logic looks...
*/
- if (__in_atomic_pool(cpu_addr, size)) {
+ if (dma_in_atomic_pool(cpu_addr, size)) {
iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
- __free_from_pool(cpu_addr, size);
+ dma_free_from_pool(cpu_addr, size);
} else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
struct page *page = vmalloc_to_page(cpu_addr);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 15bd41447025..56ed94b99963 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -455,6 +455,11 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
const void *caller);
void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags);
+int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot);
+bool dma_in_atomic_pool(void *start, size_t size);
+void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
+bool dma_free_from_pool(void *start, size_t size);
+
/**
* dma_mmap_attrs - map a coherent DMA allocation into user space
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index 9051b055beec..306557331d7d 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -69,4 +69,6 @@ static inline void arch_sync_dma_for_cpu_all(struct device *dev)
}
#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */
+void arch_dma_prep_coherent(struct page *page, size_t size);
+
#endif /* _LINUX_DMA_NONCOHERENT_H */
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index c92e08173ed8..fb045ebb0713 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -55,3 +55,9 @@ config SWIOTLB
config DMA_REMAP
depends on MMU
bool
+
+config DMA_DIRECT_REMAP
+ bool
+ depends on DMA_DIRECT_OPS
+ select DMA_REMAP
+
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index 456f7cc3414d..bc42766f52df 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -1,8 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/*
+ * Copyright (C) 2012 ARM Ltd.
* Copyright (c) 2014 The Linux Foundation
*/
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/dma-contiguous.h>
+#include <linux/init.h>
+#include <linux/genalloc.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -86,3 +91,154 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
vunmap(cpu_addr);
}
+
+#ifdef CONFIG_DMA_DIRECT_REMAP
+static struct gen_pool *atomic_pool __ro_after_init;
+
+#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
+static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
+
+static int __init early_coherent_pool(char *p)
+{
+ atomic_pool_size = memparse(p, &p);
+ return 0;
+}
+early_param("coherent_pool", early_coherent_pool);
+
+int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
+{
+ unsigned int pool_size_order = get_order(atomic_pool_size);
+ unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
+ struct page *page;
+ void *addr;
+ int ret;
+
+ if (dev_get_cma_area(NULL))
+ page = dma_alloc_from_contiguous(NULL, nr_pages,
+ pool_size_order, false);
+ else
+ page = alloc_pages(gfp, pool_size_order);
+ if (!page)
+ goto out;
+
+ memset(page_address(page), 0, atomic_pool_size);
+ arch_dma_prep_coherent(page, atomic_pool_size);
+
+ atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
+ if (!atomic_pool)
+ goto free_page;
+
+ addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP,
+ prot, __builtin_return_address(0));
+ if (!addr)
+ goto destroy_genpool;
+
+ ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
+ page_to_phys(page), atomic_pool_size, -1);
+ if (ret)
+ goto remove_mapping;
+ gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
+
+ pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
+ atomic_pool_size / 1024);
+ return 0;
+
+remove_mapping:
+ dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
+destroy_genpool:
+ gen_pool_destroy(atomic_pool);
+ atomic_pool = NULL;
+free_page:
+ if (!dma_release_from_contiguous(NULL, page, nr_pages))
+ __free_pages(page, pool_size_order);
+out:
+ pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
+ atomic_pool_size / 1024);
+ return -ENOMEM;
+}
+
+bool dma_in_atomic_pool(void *start, size_t size)
+{
+ return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+}
+
+void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
+{
+ unsigned long val;
+ void *ptr = NULL;
+
+ if (!atomic_pool) {
+ WARN(1, "coherent pool not initialised!\n");
+ return NULL;
+ }
+
+ val = gen_pool_alloc(atomic_pool, size);
+ if (val) {
+ phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
+
+ *ret_page = phys_to_page(phys);
+ ptr = (void *)val;
+ memset(ptr, 0, size);
+ }
+
+ return ptr;
+}
+
+bool dma_free_from_pool(void *start, size_t size)
+{
+ if (!dma_in_atomic_pool(start, size))
+ return false;
+ gen_pool_free(atomic_pool, (unsigned long)start, size);
+ return true;
+}
+
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t flags, unsigned long attrs)
+{
+ struct page *page = NULL;
+ void *ret, *kaddr;
+
+ size = PAGE_ALIGN(size);
+
+ if (!gfpflags_allow_blocking(flags)) {
+ ret = dma_alloc_from_pool(size, &page, flags);
+ if (!ret)
+ return NULL;
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
+ return ret;
+ }
+
+ kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
+ if (!kaddr)
+ return NULL;
+ page = virt_to_page(kaddr);
+
+ /* remove any dirty cache lines on the kernel alias */
+ arch_dma_prep_coherent(page, size);
+
+ /* create a coherent mapping */
+ ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
+ arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
+ __builtin_return_address(0));
+ if (!ret)
+ dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs);
+ return ret;
+}
+
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, unsigned long attrs)
+{
+ if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
+ void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
+
+ vunmap(vaddr);
+ dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
+ }
+}
+
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+ dma_addr_t dma_addr)
+{
+ return __phys_to_pfn(dma_to_phys(dev, dma_addr));
+}
+#endif /* CONFIG_DMA_DIRECT_REMAP */
--
2.19.1
By using __dma_direct_alloc_pages we can deal entirely with struct page
instead of having to derive a kernel virtual address.
Signed-off-by: Christoph Hellwig <[email protected]>
---
kernel/dma/remap.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index bc42766f52df..8f1fca34b894 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -196,7 +196,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flags, unsigned long attrs)
{
struct page *page = NULL;
- void *ret, *kaddr;
+ void *ret;
size = PAGE_ALIGN(size);
@@ -208,10 +208,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
return ret;
}
- kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
- if (!kaddr)
+ page = __dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
+ if (!page)
return NULL;
- page = virt_to_page(kaddr);
/* remove any dirty cache lines on the kernel alias */
arch_dma_prep_coherent(page, size);
@@ -221,7 +220,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
__builtin_return_address(0));
if (!ret)
- dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs);
+ __dma_direct_free_pages(dev, size, page);
return ret;
}
@@ -229,10 +228,11 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
- void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
+ phys_addr_t phys = dma_to_phys(dev, dma_handle);
+ struct page *page = pfn_to_page(__phys_to_pfn(phys));
vunmap(vaddr);
- dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
+ __dma_direct_free_pages(dev, size, page);
}
}
--
2.19.1
csky does not implement ZONE_DMA, which means passing GFP_DMA is a no-op.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/csky/mm/dma-mapping.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index 85437b21e045..ad4046939713 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -35,7 +35,7 @@ static int __init atomic_pool_init(void)
if (!atomic_pool)
BUG();
- page = alloc_pages(GFP_KERNEL | GFP_DMA, get_order(size));
+ page = alloc_pages(GFP_KERNEL, get_order(size));
if (!page)
BUG();
--
2.19.1
This option is gone past Linux 4.19.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/csky/Kconfig | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 8a30e006a845..c0cf8e948821 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -8,7 +8,6 @@ config CSKY
select CLKSRC_MMIO
select CLKSRC_OF
select DMA_DIRECT_OPS
- select DMA_NONCOHERENT_OPS
select DMA_REMAP
select IRQ_DOMAIN
select HANDLE_DOMAIN_IRQ
--
2.19.1
The csky code was largely copied from arm/arm64, so switch to the
generic arm64-based implementation instead.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/csky/Kconfig | 2 +-
arch/csky/mm/dma-mapping.c | 142 +------------------------------------
2 files changed, 3 insertions(+), 141 deletions(-)
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index c0cf8e948821..ea74f3a9eeaf 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -8,7 +8,7 @@ config CSKY
select CLKSRC_MMIO
select CLKSRC_OF
select DMA_DIRECT_OPS
- select DMA_REMAP
+ select DMA_DIRECT_REMAP
select IRQ_DOMAIN
select HANDLE_DOMAIN_IRQ
select DW_APB_TIMER_OF
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index ad4046939713..80783bb71c5c 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -14,73 +14,13 @@
#include <linux/version.h>
#include <asm/cache.h>
-static struct gen_pool *atomic_pool;
-static size_t atomic_pool_size __initdata = SZ_256K;
-
-static int __init early_coherent_pool(char *p)
-{
- atomic_pool_size = memparse(p, &p);
- return 0;
-}
-early_param("coherent_pool", early_coherent_pool);
-
static int __init atomic_pool_init(void)
{
- struct page *page;
- size_t size = atomic_pool_size;
- void *ptr;
- int ret;
-
- atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
- if (!atomic_pool)
- BUG();
-
- page = alloc_pages(GFP_KERNEL, get_order(size));
- if (!page)
- BUG();
-
- ptr = dma_common_contiguous_remap(page, size, VM_ALLOC,
- pgprot_noncached(PAGE_KERNEL),
- __builtin_return_address(0));
- if (!ptr)
- BUG();
-
- ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr,
- page_to_phys(page), atomic_pool_size, -1);
- if (ret)
- BUG();
-
- gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
-
- pr_info("DMA: preallocated %zu KiB pool for atomic coherent pool\n",
- atomic_pool_size / 1024);
-
- pr_info("DMA: vaddr: 0x%x phy: 0x%lx,\n", (unsigned int)ptr,
- page_to_phys(page));
-
- return 0;
+ return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
}
postcore_initcall(atomic_pool_init);
-static void *csky_dma_alloc_atomic(struct device *dev, size_t size,
- dma_addr_t *dma_handle)
-{
- unsigned long addr;
-
- addr = gen_pool_alloc(atomic_pool, size);
- if (addr)
- *dma_handle = gen_pool_virt_to_phys(atomic_pool, addr);
-
- return (void *)addr;
-}
-
-static void csky_dma_free_atomic(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- gen_pool_free(atomic_pool, (unsigned long)vaddr, size);
-}
-
-static void __dma_clear_buffer(struct page *page, size_t size)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
if (PageHighMem(page)) {
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -107,84 +47,6 @@ static void __dma_clear_buffer(struct page *page, size_t size)
}
}
-static void *csky_dma_alloc_nonatomic(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- unsigned long attrs)
-{
- void *vaddr;
- struct page *page;
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- if (DMA_ATTR_NON_CONSISTENT & attrs) {
- pr_err("csky %s can't support DMA_ATTR_NON_CONSISTENT.\n", __func__);
- return NULL;
- }
-
- if (IS_ENABLED(CONFIG_DMA_CMA))
- page = dma_alloc_from_contiguous(dev, count, get_order(size),
- gfp);
- else
- page = alloc_pages(gfp, get_order(size));
-
- if (!page) {
- pr_err("csky %s no more free pages.\n", __func__);
- return NULL;
- }
-
- *dma_handle = page_to_phys(page);
-
- __dma_clear_buffer(page, size);
-
- if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
- return page;
-
- vaddr = dma_common_contiguous_remap(page, PAGE_ALIGN(size), VM_USERMAP,
- pgprot_noncached(PAGE_KERNEL), __builtin_return_address(0));
- if (!vaddr)
- BUG();
-
- return vaddr;
-}
-
-static void csky_dma_free_nonatomic(
- struct device *dev,
- size_t size,
- void *vaddr,
- dma_addr_t dma_handle,
- unsigned long attrs
- )
-{
- struct page *page = phys_to_page(dma_handle);
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- if ((unsigned int)vaddr >= VMALLOC_START)
- dma_common_free_remap(vaddr, size, VM_USERMAP);
-
- if (IS_ENABLED(CONFIG_DMA_CMA))
- dma_release_from_contiguous(dev, page, count);
- else
- __free_pages(page, get_order(size));
-}
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
-{
- if (gfpflags_allow_blocking(gfp))
- return csky_dma_alloc_nonatomic(dev, size, dma_handle, gfp,
- attrs);
- else
- return csky_dma_alloc_atomic(dev, size, dma_handle);
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- if (!addr_in_gen_pool(atomic_pool, (unsigned int) vaddr, size))
- csky_dma_free_nonatomic(dev, size, vaddr, dma_handle, attrs);
- else
- csky_dma_free_atomic(dev, size, vaddr, dma_handle, attrs);
-}
-
static inline void cache_op(phys_addr_t paddr, size_t size,
void (*fn)(unsigned long start, unsigned long end))
{
--
2.19.1
Do not waste vmalloc space on allocations that do not require a mapping
into the kernel address space.
Signed-off-by: Christoph Hellwig <[email protected]>
---
kernel/dma/remap.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index 8f1fca34b894..10a545126b0b 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -200,7 +200,8 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
size = PAGE_ALIGN(size);
- if (!gfpflags_allow_blocking(flags)) {
+ if (!gfpflags_allow_blocking(flags) &&
+ !(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) {
ret = dma_alloc_from_pool(size, &page, flags);
if (!ret)
return NULL;
@@ -215,6 +216,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* remove any dirty cache lines on the kernel alias */
arch_dma_prep_coherent(page, size);
+ if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
+ return page; /* opaqueue cookie */
+
/* create a coherent mapping */
ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
@@ -227,7 +231,10 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
void arch_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
- if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
+ if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+ /* vaddr is a struct page cookie, not a kernel address */
+ __dma_direct_free_pages(dev, size, vaddr);
+ } else if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
phys_addr_t phys = dma_to_phys(dev, dma_handle);
struct page *page = pfn_to_page(__phys_to_pfn(phys));
--
2.19.1
On Mon, Nov 05, 2018 at 01:19:29PM +0100, Christoph Hellwig wrote:
> This option is gone past Linux 4.19.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/csky/Kconfig | 1 -
> 1 file changed, 1 deletion(-)
>
> diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
> index 8a30e006a845..c0cf8e948821 100644
> --- a/arch/csky/Kconfig
> +++ b/arch/csky/Kconfig
> @@ -8,7 +8,6 @@ config CSKY
> select CLKSRC_MMIO
> select CLKSRC_OF
> select DMA_DIRECT_OPS
> - select DMA_NONCOHERENT_OPS
> select DMA_REMAP
> select IRQ_DOMAIN
> select HANDLE_DOMAIN_IRQ
> --
> 2.19.1
Acked-by: Guo Ren <[email protected]>
On Mon, Nov 05, 2018 at 01:19:30PM +0100, Christoph Hellwig wrote:
> csky does not implement ZONE_DMA, which means passing GFP_DMA is a no-op.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/csky/mm/dma-mapping.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
> index 85437b21e045..ad4046939713 100644
> --- a/arch/csky/mm/dma-mapping.c
> +++ b/arch/csky/mm/dma-mapping.c
> @@ -35,7 +35,7 @@ static int __init atomic_pool_init(void)
> if (!atomic_pool)
> BUG();
>
> - page = alloc_pages(GFP_KERNEL | GFP_DMA, get_order(size));
> + page = alloc_pages(GFP_KERNEL, get_order(size));
> if (!page)
> BUG();
>
> --
> 2.19.1
Acked-by: Guo Ren <[email protected]>
On Mon, Nov 05, 2018 at 01:19:31PM +0100, Christoph Hellwig wrote:
> The csky code was largely copied from arm/arm64, so switch to the
> generic arm64-based implementation instead.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/csky/Kconfig | 2 +-
> arch/csky/mm/dma-mapping.c | 142 +------------------------------------
> 2 files changed, 3 insertions(+), 141 deletions(-)
>
> diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
> index c0cf8e948821..ea74f3a9eeaf 100644
> --- a/arch/csky/Kconfig
> +++ b/arch/csky/Kconfig
> @@ -8,7 +8,7 @@ config CSKY
> select CLKSRC_MMIO
> select CLKSRC_OF
> select DMA_DIRECT_OPS
> - select DMA_REMAP
> + select DMA_DIRECT_REMAP
> select IRQ_DOMAIN
> select HANDLE_DOMAIN_IRQ
> select DW_APB_TIMER_OF
> diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
> index ad4046939713..80783bb71c5c 100644
> --- a/arch/csky/mm/dma-mapping.c
> +++ b/arch/csky/mm/dma-mapping.c
> @@ -14,73 +14,13 @@
> #include <linux/version.h>
> #include <asm/cache.h>
>
> -static struct gen_pool *atomic_pool;
> -static size_t atomic_pool_size __initdata = SZ_256K;
> -
> -static int __init early_coherent_pool(char *p)
> -{
> - atomic_pool_size = memparse(p, &p);
> - return 0;
> -}
> -early_param("coherent_pool", early_coherent_pool);
> -
> static int __init atomic_pool_init(void)
> {
> - struct page *page;
> - size_t size = atomic_pool_size;
> - void *ptr;
> - int ret;
> -
> - atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
> - if (!atomic_pool)
> - BUG();
> -
> - page = alloc_pages(GFP_KERNEL, get_order(size));
> - if (!page)
> - BUG();
> -
> - ptr = dma_common_contiguous_remap(page, size, VM_ALLOC,
> - pgprot_noncached(PAGE_KERNEL),
> - __builtin_return_address(0));
> - if (!ptr)
> - BUG();
> -
> - ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr,
> - page_to_phys(page), atomic_pool_size, -1);
> - if (ret)
> - BUG();
> -
> - gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
> -
> - pr_info("DMA: preallocated %zu KiB pool for atomic coherent pool\n",
> - atomic_pool_size / 1024);
> -
> - pr_info("DMA: vaddr: 0x%x phy: 0x%lx,\n", (unsigned int)ptr,
> - page_to_phys(page));
> -
> - return 0;
> + return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
> }
> postcore_initcall(atomic_pool_init);
Seems also could remove atomic_pool_init from csky, why not put them in
common?
>
> -static void *csky_dma_alloc_atomic(struct device *dev, size_t size,
> - dma_addr_t *dma_handle)
> -{
> - unsigned long addr;
> -
> - addr = gen_pool_alloc(atomic_pool, size);
> - if (addr)
> - *dma_handle = gen_pool_virt_to_phys(atomic_pool, addr);
> -
> - return (void *)addr;
> -}
> -
> -static void csky_dma_free_atomic(struct device *dev, size_t size, void *vaddr,
> - dma_addr_t dma_handle, unsigned long attrs)
> -{
> - gen_pool_free(atomic_pool, (unsigned long)vaddr, size);
> -}
> -
> -static void __dma_clear_buffer(struct page *page, size_t size)
> +void arch_dma_prep_coherent(struct page *page, size_t size)
> {
> if (PageHighMem(page)) {
> unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> @@ -107,84 +47,6 @@ static void __dma_clear_buffer(struct page *page, size_t size)
> }
> }
>
> -static void *csky_dma_alloc_nonatomic(struct device *dev, size_t size,
> - dma_addr_t *dma_handle, gfp_t gfp,
> - unsigned long attrs)
> -{
> - void *vaddr;
> - struct page *page;
> - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> -
> - if (DMA_ATTR_NON_CONSISTENT & attrs) {
> - pr_err("csky %s can't support DMA_ATTR_NON_CONSISTENT.\n", __func__);
> - return NULL;
> - }
> -
> - if (IS_ENABLED(CONFIG_DMA_CMA))
> - page = dma_alloc_from_contiguous(dev, count, get_order(size),
> - gfp);
> - else
> - page = alloc_pages(gfp, get_order(size));
> -
> - if (!page) {
> - pr_err("csky %s no more free pages.\n", __func__);
> - return NULL;
> - }
> -
> - *dma_handle = page_to_phys(page);
> -
> - __dma_clear_buffer(page, size);
> -
> - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
> - return page;
> -
> - vaddr = dma_common_contiguous_remap(page, PAGE_ALIGN(size), VM_USERMAP,
> - pgprot_noncached(PAGE_KERNEL), __builtin_return_address(0));
> - if (!vaddr)
> - BUG();
> -
> - return vaddr;
> -}
> -
> -static void csky_dma_free_nonatomic(
> - struct device *dev,
> - size_t size,
> - void *vaddr,
> - dma_addr_t dma_handle,
> - unsigned long attrs
> - )
> -{
> - struct page *page = phys_to_page(dma_handle);
> - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> -
> - if ((unsigned int)vaddr >= VMALLOC_START)
> - dma_common_free_remap(vaddr, size, VM_USERMAP);
> -
> - if (IS_ENABLED(CONFIG_DMA_CMA))
> - dma_release_from_contiguous(dev, page, count);
> - else
> - __free_pages(page, get_order(size));
> -}
> -
> -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> - gfp_t gfp, unsigned long attrs)
> -{
> - if (gfpflags_allow_blocking(gfp))
> - return csky_dma_alloc_nonatomic(dev, size, dma_handle, gfp,
> - attrs);
> - else
> - return csky_dma_alloc_atomic(dev, size, dma_handle);
> -}
> -
> -void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> - dma_addr_t dma_handle, unsigned long attrs)
> -{
> - if (!addr_in_gen_pool(atomic_pool, (unsigned int) vaddr, size))
> - csky_dma_free_nonatomic(dev, size, vaddr, dma_handle, attrs);
> - else
> - csky_dma_free_atomic(dev, size, vaddr, dma_handle, attrs);
> -}
> -
> static inline void cache_op(phys_addr_t paddr, size_t size,
> void (*fn)(unsigned long start, unsigned long end))
> {
> --
> 2.19.1
Reviewed-by: Guo Ren <[email protected]>
Compile is OK, qemu boot OK. Functions are the same and just move to common.
Looks good for me.
Guo Ren
On Tue, Nov 06, 2018 at 03:01:41PM +0800, Guo Ren wrote:
> > + return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
> > }
> > postcore_initcall(atomic_pool_init);
> Seems also could remove atomic_pool_init from csky, why not put them in
> common?
The code basically moved to common code, but the architecture needs
to pick the gfp mask (GFP_DMA32 on arm vs GFP_KERNEL on csky for example)
and the pgprot it needs for uncached remappings.
> Reviewed-by: Guo Ren <[email protected]>
>
> Compile is OK, qemu boot OK. Functions are the same and just move to common.
Thanks for your review!
Can I get a quick review from the arm64 folks? I think it should
be fine there as it basically is a code move, but an additional pair
or two of eyes always helps to weed out bugs.
On Mon, Nov 05, 2018 at 01:19:22PM +0100, Christoph Hellwig wrote:
> Hi all,
>
> this series moves the existing arm64 implementation of arch_dma_alloc and
> arch_dma_free to common code given that it is not arm64-specific, and
> then also uses it for csky. Given how many architectures remap memory
> for the DMA coherent implementation it should be usable for many more,
> and the new cache flushing hook and the generic atomic pool are also
> enablers for implementing the IOMMU API dma ops in common code in a
> follow on series.
> _______________________________________________
> iommu mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
---end quoted text---
Hi Christoph,
Minor nit: typo in the subject "ncoherent".
On Mon, Nov 05, 2018 at 01:19:26PM +0100, Christoph Hellwig wrote:
> The arm64 codebase to implement coherent dma allocation for architectures
> with non-coherent DMA is a good start for a generic implementation, given
> that is uses the generic remap helpers, provides the atomic pool for
> allocations that can't sleep and still is realtively simple and well
> tested. Move it to kernel/dma and allow architectures to opt into it
> using a config symbol. Architectures just need to provide a new
> arch_dma_prep_coherent helper to writeback an invalidate the caches
> for any memory that gets remapped for uncached access.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/arm64/Kconfig | 2 +-
> arch/arm64/mm/dma-mapping.c | 184 ++------------------------------
> include/linux/dma-mapping.h | 5 +
> include/linux/dma-noncoherent.h | 2 +
> kernel/dma/Kconfig | 6 ++
> kernel/dma/remap.c | 158 ++++++++++++++++++++++++++-
> 6 files changed, 181 insertions(+), 176 deletions(-)
I'm currently at LPC, so I've not been able to test this, but I've been
through the changes this morning and they look fine to me, so:
Reviewed-by: Will Deacon <[email protected]>
Hopefully we'll get the fallout from the previous changes addressed next
week.
Cheers,
Will
On Fri, Nov 09, 2018 at 08:52:38AM +0100, Christoph Hellwig wrote:
> Can I get a quick review from the arm64 folks? I think it should
> be fine there as it basically is a code move, but an additional pair
> or two of eyes always helps to weed out bugs.
I reviewed the arm64 parts, but it would be ideal if Robin could have a look
as well.
Will
On 2018-11-15 11:50 am, Will Deacon wrote:
> On Fri, Nov 09, 2018 at 08:52:38AM +0100, Christoph Hellwig wrote:
>> Can I get a quick review from the arm64 folks? I think it should
>> be fine there as it basically is a code move, but an additional pair
>> or two of eyes always helps to weed out bugs.
>
> I reviewed the arm64 parts, but it would be ideal if Robin could have a look
> as well.
Yup, from a quick skim the general shape of the whole series looks
pleasing, but I've been holding off going through it in detail until
I've figured out what's up with the last thing I thought I'd reviewed
exhaustively...
Either way I'll make some time for a proper look next week once I'm back.
Robin.
On Thu, Nov 15, 2018 at 12:58:04PM -0800, Robin Murphy wrote:
> On 2018-11-15 11:50 am, Will Deacon wrote:
>> On Fri, Nov 09, 2018 at 08:52:38AM +0100, Christoph Hellwig wrote:
>>> Can I get a quick review from the arm64 folks? I think it should
>>> be fine there as it basically is a code move, but an additional pair
>>> or two of eyes always helps to weed out bugs.
>>
>> I reviewed the arm64 parts, but it would be ideal if Robin could have a look
>> as well.
>
> Yup, from a quick skim the general shape of the whole series looks
> pleasing, but I've been holding off going through it in detail until I've
> figured out what's up with the last thing I thought I'd reviewed
> exhaustively...
>
> Either way I'll make some time for a proper look next week once I'm back.
Did you get a chance to look over this?
On 27/11/2018 07:37, Christoph Hellwig wrote:
> On Thu, Nov 15, 2018 at 12:58:04PM -0800, Robin Murphy wrote:
>> On 2018-11-15 11:50 am, Will Deacon wrote:
>>> On Fri, Nov 09, 2018 at 08:52:38AM +0100, Christoph Hellwig wrote:
>>>> Can I get a quick review from the arm64 folks? I think it should
>>>> be fine there as it basically is a code move, but an additional pair
>>>> or two of eyes always helps to weed out bugs.
>>>
>>> I reviewed the arm64 parts, but it would be ideal if Robin could have a look
>>> as well.
>>
>> Yup, from a quick skim the general shape of the whole series looks
>> pleasing, but I've been holding off going through it in detail until I've
>> figured out what's up with the last thing I thought I'd reviewed
>> exhaustively...
>>
>> Either way I'll make some time for a proper look next week once I'm back.
>
> Did you get a chance to look over this?
Sorry, it took a little longer than expected to catch up, but I'm
looking at it now.
Robin.
On 05/11/2018 12:19, Christoph Hellwig wrote:
> dma_alloc_from_contiguous can return highmem pages depending on the
> setup, which a plain non-remapping DMA allocator can't handle. Detect
> this case and try the normal page allocator instead.
...except the actual implementation is "Detect this case and fail the
entire allocation if so".
Again, the diff itself makes sense, so given an accurate commit message,
Reviewed-by: Robin Murphy <[email protected]>
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> kernel/dma/direct.c | 12 ++++++++++++
> 1 file changed, 12 insertions(+)
>
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 680287779b0a..c49849bcced6 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -162,6 +162,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> if (!page)
> return NULL;
>
> + if (PageHighMem(page)) {
> + /*
> + * Depending on the cma= arguments and per-arch setup
> + * dma_alloc_from_contiguous could return highmem pages.
> + * Without remapping there is no way to return them here,
> + * so log an error and fail.
> + */
> + dev_info(dev, "Rejecting highmem page from CMA.\n");
> + __dma_direct_free_pages(dev, size, page);
> + return NULL;
> + }
> +
> ret = page_address(page);
> if (force_dma_unencrypted()) {
> set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
>
On 05/11/2018 12:19, Christoph Hellwig wrote:
> The dma remap code only really makes sense for not cache coherent
> architectures,
And coherent ones with highmem, presumably? That can at least be the
case on 32-bit Arm, where coherent LPAE systems do exist (e.g. Calxeda
Midway).
> and currently is only used by arm, arm64 and xtensa.
> Split it out into a separate file with a separate Kconfig symbol,
> which gets the right copyright notice given that this code was
> written by Laura Abbott working for Code Aurora at that point.
Ignoring the further super-nitpick that the comments got subtle grammar
fixes in some places but not others,
Reviewed-by: Robin Murphy <[email protected]>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Acked-by: Laura Abbott <[email protected]>
> ---
> arch/arm/Kconfig | 1 +
> arch/arm64/Kconfig | 1 +
> arch/csky/Kconfig | 1 +
> arch/xtensa/Kconfig | 1 +
> kernel/dma/Kconfig | 4 ++
> kernel/dma/Makefile | 2 +-
> kernel/dma/mapping.c | 84 ------------------------------------------
> kernel/dma/remap.c | 88 ++++++++++++++++++++++++++++++++++++++++++++
> 8 files changed, 97 insertions(+), 85 deletions(-)
> create mode 100644 kernel/dma/remap.c
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 91be74d8df65..3b2852df6eb3 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -30,6 +30,7 @@ config ARM
> select CPU_PM if (SUSPEND || CPU_IDLE)
> select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
> select DMA_DIRECT_OPS if !MMU
> + select DMA_REMAP if MMU
> select EDAC_SUPPORT
> select EDAC_ATOMIC_SCRUB
> select GENERIC_ALLOCATOR
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 787d7850e064..5d065acb6d10 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -82,6 +82,7 @@ config ARM64
> select CRC32
> select DCACHE_WORD_ACCESS
> select DMA_DIRECT_OPS
> + select DMA_REMAP
> select EDAC_SUPPORT
> select FRAME_POINTER
> select GENERIC_ALLOCATOR
> diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
> index cb64f8dacd08..8a30e006a845 100644
> --- a/arch/csky/Kconfig
> +++ b/arch/csky/Kconfig
> @@ -9,6 +9,7 @@ config CSKY
> select CLKSRC_OF
> select DMA_DIRECT_OPS
> select DMA_NONCOHERENT_OPS
> + select DMA_REMAP
> select IRQ_DOMAIN
> select HANDLE_DOMAIN_IRQ
> select DW_APB_TIMER_OF
> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
> index d29b7365da8d..239bfb16c58b 100644
> --- a/arch/xtensa/Kconfig
> +++ b/arch/xtensa/Kconfig
> @@ -11,6 +11,7 @@ config XTENSA
> select CLONE_BACKWARDS
> select COMMON_CLK
> select DMA_DIRECT_OPS
> + select DMA_REMAP if MMU
> select GENERIC_ATOMIC64
> select GENERIC_CLOCKEVENTS
> select GENERIC_IRQ_SHOW
> diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
> index 645c7a2ecde8..c92e08173ed8 100644
> --- a/kernel/dma/Kconfig
> +++ b/kernel/dma/Kconfig
> @@ -51,3 +51,7 @@ config SWIOTLB
> bool
> select DMA_DIRECT_OPS
> select NEED_DMA_MAP_STATE
> +
> +config DMA_REMAP
> + depends on MMU
> + bool
> diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
> index 7d581e4eea4a..f4feeceb8020 100644
> --- a/kernel/dma/Makefile
> +++ b/kernel/dma/Makefile
> @@ -7,4 +7,4 @@ obj-$(CONFIG_DMA_DIRECT_OPS) += direct.o
> obj-$(CONFIG_DMA_VIRT_OPS) += virt.o
> obj-$(CONFIG_DMA_API_DEBUG) += debug.o
> obj-$(CONFIG_SWIOTLB) += swiotlb.o
> -
> +obj-$(CONFIG_DMA_REMAP) += remap.o
> diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
> index 58dec7a92b7b..dfbc3deb95cd 100644
> --- a/kernel/dma/mapping.c
> +++ b/kernel/dma/mapping.c
> @@ -262,87 +262,3 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
> #endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
> }
> EXPORT_SYMBOL(dma_common_mmap);
> -
> -#ifdef CONFIG_MMU
> -static struct vm_struct *__dma_common_pages_remap(struct page **pages,
> - size_t size, unsigned long vm_flags, pgprot_t prot,
> - const void *caller)
> -{
> - struct vm_struct *area;
> -
> - area = get_vm_area_caller(size, vm_flags, caller);
> - if (!area)
> - return NULL;
> -
> - if (map_vm_area(area, prot, pages)) {
> - vunmap(area->addr);
> - return NULL;
> - }
> -
> - return area;
> -}
> -
> -/*
> - * remaps an array of PAGE_SIZE pages into another vm_area
> - * Cannot be used in non-sleeping contexts
> - */
> -void *dma_common_pages_remap(struct page **pages, size_t size,
> - unsigned long vm_flags, pgprot_t prot,
> - const void *caller)
> -{
> - struct vm_struct *area;
> -
> - area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
> - if (!area)
> - return NULL;
> -
> - area->pages = pages;
> -
> - return area->addr;
> -}
> -
> -/*
> - * remaps an allocated contiguous region into another vm_area.
> - * Cannot be used in non-sleeping contexts
> - */
> -
> -void *dma_common_contiguous_remap(struct page *page, size_t size,
> - unsigned long vm_flags,
> - pgprot_t prot, const void *caller)
> -{
> - int i;
> - struct page **pages;
> - struct vm_struct *area;
> -
> - pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
> - if (!pages)
> - return NULL;
> -
> - for (i = 0; i < (size >> PAGE_SHIFT); i++)
> - pages[i] = nth_page(page, i);
> -
> - area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
> -
> - kfree(pages);
> -
> - if (!area)
> - return NULL;
> - return area->addr;
> -}
> -
> -/*
> - * unmaps a range previously mapped by dma_common_*_remap
> - */
> -void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
> -{
> - struct vm_struct *area = find_vm_area(cpu_addr);
> -
> - if (!area || (area->flags & vm_flags) != vm_flags) {
> - WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
> - return;
> - }
> -
> - unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
> - vunmap(cpu_addr);
> -}
> -#endif
> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
> new file mode 100644
> index 000000000000..456f7cc3414d
> --- /dev/null
> +++ b/kernel/dma/remap.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2014 The Linux Foundation
> + */
> +#include <linux/dma-mapping.h>
> +#include <linux/slab.h>
> +#include <linux/vmalloc.h>
> +
> +static struct vm_struct *__dma_common_pages_remap(struct page **pages,
> + size_t size, unsigned long vm_flags, pgprot_t prot,
> + const void *caller)
> +{
> + struct vm_struct *area;
> +
> + area = get_vm_area_caller(size, vm_flags, caller);
> + if (!area)
> + return NULL;
> +
> + if (map_vm_area(area, prot, pages)) {
> + vunmap(area->addr);
> + return NULL;
> + }
> +
> + return area;
> +}
> +
> +/*
> + * remaps an array of PAGE_SIZE pages into another vm_area
> + * Cannot be used in non-sleeping contexts
> + */
> +void *dma_common_pages_remap(struct page **pages, size_t size,
> + unsigned long vm_flags, pgprot_t prot,
> + const void *caller)
> +{
> + struct vm_struct *area;
> +
> + area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
> + if (!area)
> + return NULL;
> +
> + area->pages = pages;
> +
> + return area->addr;
> +}
> +
> +/*
> + * Remaps an allocated contiguous region into another vm_area.
> + * Cannot be used in non-sleeping contexts
> + */
> +void *dma_common_contiguous_remap(struct page *page, size_t size,
> + unsigned long vm_flags,
> + pgprot_t prot, const void *caller)
> +{
> + int i;
> + struct page **pages;
> + struct vm_struct *area;
> +
> + pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
> + if (!pages)
> + return NULL;
> +
> + for (i = 0; i < (size >> PAGE_SHIFT); i++)
> + pages[i] = nth_page(page, i);
> +
> + area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
> +
> + kfree(pages);
> +
> + if (!area)
> + return NULL;
> + return area->addr;
> +}
> +
> +/*
> + * Unmaps a range previously mapped by dma_common_*_remap
> + */
> +void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
> +{
> + struct vm_struct *area = find_vm_area(cpu_addr);
> +
> + if (!area || (area->flags & vm_flags) != vm_flags) {
> + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
> + return;
> + }
> +
> + unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
> + vunmap(cpu_addr);
> +}
>
On 05/11/2018 12:19, Christoph Hellwig wrote:
> Some architectures support remapping highmem into DMA coherent
> allocations. To use the common code for them we need variants of
> dma_direct_{alloc,free}_pages that do not use kernel virtual addresses.
FWIW it's as much about non-cacheable remapping of lowmem as it is about
highmem. Regardless, the diff looks OK to me.
Reviewed-by: Robin Murphy <[email protected]>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> include/linux/dma-direct.h | 3 +++
> kernel/dma/direct.c | 32 ++++++++++++++++++++++----------
> 2 files changed, 25 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
> index bd73e7a91410..5a7a3bbb912f 100644
> --- a/include/linux/dma-direct.h
> +++ b/include/linux/dma-direct.h
> @@ -67,6 +67,9 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
> void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
> dma_addr_t dma_addr, unsigned long attrs);
> +struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
> + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
> +void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page);
> dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
> unsigned long offset, size_t size, enum dma_data_direction dir,
> unsigned long attrs);
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 22a12ab5a5e9..680287779b0a 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -103,14 +103,13 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
> min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask);
> }
>
> -void *dma_direct_alloc_pages(struct device *dev, size_t size,
> +struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
> dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
> {
> unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> int page_order = get_order(size);
> struct page *page = NULL;
> u64 phys_mask;
> - void *ret;
>
> if (attrs & DMA_ATTR_NO_WARN)
> gfp |= __GFP_NOWARN;
> @@ -150,11 +149,22 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> }
> }
>
> + return page;
> +}
> +
> +void *dma_direct_alloc_pages(struct device *dev, size_t size,
> + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
> +{
> + struct page *page;
> + void *ret;
> +
> + page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
> if (!page)
> return NULL;
> +
> ret = page_address(page);
> if (force_dma_unencrypted()) {
> - set_memory_decrypted((unsigned long)ret, 1 << page_order);
> + set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
> *dma_handle = __phys_to_dma(dev, page_to_phys(page));
> } else {
> *dma_handle = phys_to_dma(dev, page_to_phys(page));
> @@ -163,20 +173,22 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> return ret;
> }
>
> -/*
> - * NOTE: this function must never look at the dma_addr argument, because we want
> - * to be able to use it as a helper for iommu implementations as well.
> - */
> +void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page)
> +{
> + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +
> + if (!dma_release_from_contiguous(dev, page, count))
> + __free_pages(page, get_order(size));
> +}
> +
> void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
> dma_addr_t dma_addr, unsigned long attrs)
> {
> - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> unsigned int page_order = get_order(size);
>
> if (force_dma_unencrypted())
> set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
> - if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
> - free_pages((unsigned long)cpu_addr, page_order);
> + __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr));
> }
>
> void *dma_direct_alloc(struct device *dev, size_t size,
>
On 05/11/2018 12:19, Christoph Hellwig wrote:
> The arm64 codebase to implement coherent dma allocation for architectures
> with non-coherent DMA is a good start for a generic implementation, given
> that is uses the generic remap helpers, provides the atomic pool for
> allocations that can't sleep and still is realtively simple and well
> tested. Move it to kernel/dma and allow architectures to opt into it
> using a config symbol. Architectures just need to provide a new
> arch_dma_prep_coherent helper to writeback an invalidate the caches
> for any memory that gets remapped for uncached access.
It's a bit yuck that we now end up with arch_* hooks being a mix of arch
code and not-actually-arch-code, but I guess there's some hope of coming
back and streamlining things in future once all the big moves are done.
I can't really be bothered to nitpick the typos above and the slight
inconsistencies in some of the cosmetic code changes, but one worthwhile
thing stands out...
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/arm64/Kconfig | 2 +-
> arch/arm64/mm/dma-mapping.c | 184 ++------------------------------
> include/linux/dma-mapping.h | 5 +
> include/linux/dma-noncoherent.h | 2 +
> kernel/dma/Kconfig | 6 ++
> kernel/dma/remap.c | 158 ++++++++++++++++++++++++++-
> 6 files changed, 181 insertions(+), 176 deletions(-)
[...]
> +void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
> +{
> + unsigned long val;
> + void *ptr = NULL;
> +
> + if (!atomic_pool) {
> + WARN(1, "coherent pool not initialised!\n");
> + return NULL;
> + }
> +
> + val = gen_pool_alloc(atomic_pool, size);
> + if (val) {
> + phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
> +
> + *ret_page = phys_to_page(phys);
Looks like phys_to_page() isn't particularly portable, so we probably
want an explicit pfn_to_page(__phys_to_pfn(phys)) here. Otherwise, the
fundamental refactoring looks OK.
Reviewed-by: Robin Murphy <[email protected]>
[ In fact, looking at phys_to_page(), microblaze, riscv, unicore and
csky (by the end of this series if it's fixed here) don't need to define
it at all; s390 defines it for the sake of a single call in a single
driver; it's used in two other places in arm-related drivers but at
least one of those is clearly wrong. All in all it's quite the sorry mess. ]
> + ptr = (void *)val;
> + memset(ptr, 0, size);
> + }
> +
> + return ptr;
> +}
> +
> +bool dma_free_from_pool(void *start, size_t size)
> +{
> + if (!dma_in_atomic_pool(start, size))
> + return false;
> + gen_pool_free(atomic_pool, (unsigned long)start, size);
> + return true;
> +}
> +
> +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> + gfp_t flags, unsigned long attrs)
> +{
> + struct page *page = NULL;
> + void *ret, *kaddr;
> +
> + size = PAGE_ALIGN(size);
> +
> + if (!gfpflags_allow_blocking(flags)) {
> + ret = dma_alloc_from_pool(size, &page, flags);
> + if (!ret)
> + return NULL;
> + *dma_handle = phys_to_dma(dev, page_to_phys(page));
> + return ret;
> + }
> +
> + kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
> + if (!kaddr)
> + return NULL;
> + page = virt_to_page(kaddr);
> +
> + /* remove any dirty cache lines on the kernel alias */
> + arch_dma_prep_coherent(page, size);
> +
> + /* create a coherent mapping */
> + ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
> + arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
> + __builtin_return_address(0));
> + if (!ret)
> + dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs);
> + return ret;
> +}
> +
> +void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> + dma_addr_t dma_handle, unsigned long attrs)
> +{
> + if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
> + void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
> +
> + vunmap(vaddr);
> + dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
> + }
> +}
> +
> +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
> + dma_addr_t dma_addr)
> +{
> + return __phys_to_pfn(dma_to_phys(dev, dma_addr));
> +}
> +#endif /* CONFIG_DMA_DIRECT_REMAP */
>
On 05/11/2018 12:19, Christoph Hellwig wrote:
> By using __dma_direct_alloc_pages we can deal entirely with struct page
> instead of having to derive a kernel virtual address.
Simple enough :)
Reviewed-by: Robin Murphy <[email protected]>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> kernel/dma/remap.c | 14 +++++++-------
> 1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
> index bc42766f52df..8f1fca34b894 100644
> --- a/kernel/dma/remap.c
> +++ b/kernel/dma/remap.c
> @@ -196,7 +196,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> gfp_t flags, unsigned long attrs)
> {
> struct page *page = NULL;
> - void *ret, *kaddr;
> + void *ret;
>
> size = PAGE_ALIGN(size);
>
> @@ -208,10 +208,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> return ret;
> }
>
> - kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
> - if (!kaddr)
> + page = __dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
> + if (!page)
> return NULL;
> - page = virt_to_page(kaddr);
>
> /* remove any dirty cache lines on the kernel alias */
> arch_dma_prep_coherent(page, size);
> @@ -221,7 +220,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
> __builtin_return_address(0));
> if (!ret)
> - dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs);
> + __dma_direct_free_pages(dev, size, page);
> return ret;
> }
>
> @@ -229,10 +228,11 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> dma_addr_t dma_handle, unsigned long attrs)
> {
> if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
> - void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
> + phys_addr_t phys = dma_to_phys(dev, dma_handle);
> + struct page *page = pfn_to_page(__phys_to_pfn(phys));
>
> vunmap(vaddr);
> - dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
> + __dma_direct_free_pages(dev, size, page);
> }
> }
>
>
On 05/11/2018 12:19, Christoph Hellwig wrote:
> Do not waste vmalloc space on allocations that do not require a mapping
> into the kernel address space.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> kernel/dma/remap.c | 11 +++++++++--
> 1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
> index 8f1fca34b894..10a545126b0b 100644
> --- a/kernel/dma/remap.c
> +++ b/kernel/dma/remap.c
> @@ -200,7 +200,8 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
>
> size = PAGE_ALIGN(size);
>
> - if (!gfpflags_allow_blocking(flags)) {
> + if (!gfpflags_allow_blocking(flags) &&
> + !(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) {
> ret = dma_alloc_from_pool(size, &page, flags);
> if (!ret)
> return NULL;
> @@ -215,6 +216,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> /* remove any dirty cache lines on the kernel alias */
> arch_dma_prep_coherent(page, size);
>
> + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
> + return page; /* opaqueue cookie */
Best to preempt the inevitable patch fixing that typo in 3 months' time.
Otherwise,
Reviewed-by: Robin Murphy <[email protected]>
> +
> /* create a coherent mapping */
> ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
> arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
> @@ -227,7 +231,10 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> dma_addr_t dma_handle, unsigned long attrs)
> {
> - if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
> + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
> + /* vaddr is a struct page cookie, not a kernel address */
> + __dma_direct_free_pages(dev, size, vaddr);
> + } else if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
> phys_addr_t phys = dma_to_phys(dev, dma_handle);
> struct page *page = pfn_to_page(__phys_to_pfn(phys));
>
>
On Fri, Nov 30, 2018 at 07:04:41PM +0000, Robin Murphy wrote:
> On 05/11/2018 12:19, Christoph Hellwig wrote:
>> Some architectures support remapping highmem into DMA coherent
>> allocations. To use the common code for them we need variants of
>> dma_direct_{alloc,free}_pages that do not use kernel virtual addresses.
>
> FWIW it's as much about non-cacheable remapping of lowmem as it is about
> highmem. Regardless, the diff looks OK to me.
Yes, but as long as you remap lowmem the current interface work ok,
but once you have highmem a kernel virtual address doesn't cut it,
and we need a page struct or physical address.
On Fri, Nov 30, 2018 at 07:04:51PM +0000, Robin Murphy wrote:
> On 05/11/2018 12:19, Christoph Hellwig wrote:
>> dma_alloc_from_contiguous can return highmem pages depending on the
>> setup, which a plain non-remapping DMA allocator can't handle. Detect
>> this case and try the normal page allocator instead.
>
> ...except the actual implementation is "Detect this case and fail the
> entire allocation if so".
True, that actually changed from when I wrote the commit log due to
some layer issues. I'll update the commit log.
On Fri, Nov 30, 2018 at 07:05:06PM +0000, Robin Murphy wrote:
> On 05/11/2018 12:19, Christoph Hellwig wrote:
>> The dma remap code only really makes sense for not cache coherent
>> architectures,
>
> And coherent ones with highmem, presumably? That can at least be the case
> on 32-bit Arm, where coherent LPAE systems do exist (e.g. Calxeda Midway).
Ok, I've updated the commit log.
On Fri, Nov 30, 2018 at 07:05:23PM +0000, Robin Murphy wrote:
> It's a bit yuck that we now end up with arch_* hooks being a mix of arch
> code and not-actually-arch-code, but I guess there's some hope of coming
> back and streamlining things in future once all the big moves are done.
Yes, I hope we can use some form of common code here for most
architectures eventually. But that will some time.
> I can't really be bothered to nitpick the typos above and the slight
> inconsistencies in some of the cosmetic code changes, but one worthwhile
> thing stands out...
I'm usually fine picking up nitpicks. For now I'll apply the series
with the pointed out fixups, but if you want to send the fixups
I'd be glad.
>> + val = gen_pool_alloc(atomic_pool, size);
>> + if (val) {
>> + phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
>> +
>> + *ret_page = phys_to_page(phys);
>
> Looks like phys_to_page() isn't particularly portable, so we probably want
> an explicit pfn_to_page(__phys_to_pfn(phys)) here. Otherwise, the
> fundamental refactoring looks OK.
Ok, I'll updated it.
On Fri, Nov 30, 2018 at 07:05:40PM +0000, Robin Murphy wrote:
> On 05/11/2018 12:19, Christoph Hellwig wrote:
>> Do not waste vmalloc space on allocations that do not require a mapping
>> into the kernel address space.
>>
>> Signed-off-by: Christoph Hellwig <[email protected]>
>> ---
>> kernel/dma/remap.c | 11 +++++++++--
>> 1 file changed, 9 insertions(+), 2 deletions(-)
>>
>> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
>> index 8f1fca34b894..10a545126b0b 100644
>> --- a/kernel/dma/remap.c
>> +++ b/kernel/dma/remap.c
>> @@ -200,7 +200,8 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
>> size = PAGE_ALIGN(size);
>> - if (!gfpflags_allow_blocking(flags)) {
>> + if (!gfpflags_allow_blocking(flags) &&
>> + !(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) {
>> ret = dma_alloc_from_pool(size, &page, flags);
>> if (!ret)
>> return NULL;
>> @@ -215,6 +216,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
>> /* remove any dirty cache lines on the kernel alias */
>> arch_dma_prep_coherent(page, size);
>> + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
>> + return page; /* opaqueue cookie */
>
> Best to preempt the inevitable patch fixing that typo in 3 months' time.
> Otherwise,
Thanks,
fixed.
Hi All,
On 2018-11-30 20:05, Robin Murphy wrote:
> On 05/11/2018 12:19, Christoph Hellwig wrote:
>> By using __dma_direct_alloc_pages we can deal entirely with struct page
>> instead of having to derive a kernel virtual address.
>
> Simple enough :)
>
> Reviewed-by: Robin Murphy <[email protected]>
This patch has landed linux-next yesterday and I've noticed that it
breaks operation of many drivers. The change looked simple, but a stupid
bug managed to slip into the code. After a short investigation I've
noticed that __dma_direct_alloc_pages() doesn't set dma_handle and zero
allocated memory, while dma_direct_alloc_pages() did. The other
difference is the lack of set_memory_decrypted() handling.
Following patch fixes the issue, but maybe it would be better to fix it
in kernel/dma/direct.c:
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index dcc82dd668f8..7765ddc56e4e 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -219,8 +219,14 @@ void *arch_dma_alloc(struct device *dev, size_t
size, dma_addr_t *dma_handle,
ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
__builtin_return_address(0));
- if (!ret)
+ if (!ret) {
__dma_direct_free_pages(dev, size, page);
+ return ret;
+ }
+
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
+ memset(ret, 0, size);
+
return ret;
}
>
>> Signed-off-by: Christoph Hellwig <[email protected]>
>> ---
>> kernel/dma/remap.c | 14 +++++++-------
>> 1 file changed, 7 insertions(+), 7 deletions(-)
>>
>> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
>> index bc42766f52df..8f1fca34b894 100644
>> --- a/kernel/dma/remap.c
>> +++ b/kernel/dma/remap.c
>> @@ -196,7 +196,7 @@ void *arch_dma_alloc(struct device *dev, size_t
>> size, dma_addr_t *dma_handle,
>> gfp_t flags, unsigned long attrs)
>> {
>> struct page *page = NULL;
>> - void *ret, *kaddr;
>> + void *ret;
>> size = PAGE_ALIGN(size);
>> @@ -208,10 +208,9 @@ void *arch_dma_alloc(struct device *dev,
>> size_t size, dma_addr_t *dma_handle,
>> return ret;
>> }
>> - kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags,
>> attrs);
>> - if (!kaddr)
>> + page = __dma_direct_alloc_pages(dev, size, dma_handle, flags,
>> attrs);
>> + if (!page)
>> return NULL;
>> - page = virt_to_page(kaddr);
>> /* remove any dirty cache lines on the kernel alias */
>> arch_dma_prep_coherent(page, size);
>> @@ -221,7 +220,7 @@ void *arch_dma_alloc(struct device *dev, size_t
>> size, dma_addr_t *dma_handle,
>> arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
>> __builtin_return_address(0));
>> if (!ret)
>> - dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs);
>> + __dma_direct_free_pages(dev, size, page);
>> return ret;
>> }
>> @@ -229,10 +228,11 @@ void arch_dma_free(struct device *dev, size_t
>> size, void *vaddr,
>> dma_addr_t dma_handle, unsigned long attrs)
>> {
>> if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
>> - void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
>> + phys_addr_t phys = dma_to_phys(dev, dma_handle);
>> + struct page *page = pfn_to_page(__phys_to_pfn(phys));
>> vunmap(vaddr);
>> - dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
>> + __dma_direct_free_pages(dev, size, page);
>> }
>> }
>>
>
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On Mon, Nov 05, 2018 at 01:19:26PM +0100, Christoph Hellwig wrote:
> The arm64 codebase to implement coherent dma allocation for architectures
> with non-coherent DMA is a good start for a generic implementation, given
> that is uses the generic remap helpers, provides the atomic pool for
> allocations that can't sleep and still is realtively simple and well
> tested. Move it to kernel/dma and allow architectures to opt into it
> using a config symbol. Architectures just need to provide a new
> arch_dma_prep_coherent helper to writeback an invalidate the caches
> for any memory that gets remapped for uncached access.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/arm64/Kconfig | 2 +-
> arch/arm64/mm/dma-mapping.c | 184 ++------------------------------
> include/linux/dma-mapping.h | 5 +
> include/linux/dma-noncoherent.h | 2 +
> kernel/dma/Kconfig | 6 ++
> kernel/dma/remap.c | 158 ++++++++++++++++++++++++++-
> 6 files changed, 181 insertions(+), 176 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5d065acb6d10..2e645ea693ea 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -82,7 +82,7 @@ config ARM64
> select CRC32
> select DCACHE_WORD_ACCESS
> select DMA_DIRECT_OPS
> - select DMA_REMAP
> + select DMA_DIRECT_REMAP
> select EDAC_SUPPORT
> select FRAME_POINTER
> select GENERIC_ALLOCATOR
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index a3ac26284845..e2e7e5d0f94e 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -33,113 +33,6 @@
>
> #include <asm/cacheflush.h>
>
> -static struct gen_pool *atomic_pool __ro_after_init;
> -
> -#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
> -static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
> -
> -static int __init early_coherent_pool(char *p)
> -{
> - atomic_pool_size = memparse(p, &p);
> - return 0;
> -}
> -early_param("coherent_pool", early_coherent_pool);
> -
> -static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
> -{
> - unsigned long val;
> - void *ptr = NULL;
> -
> - if (!atomic_pool) {
> - WARN(1, "coherent pool not initialised!\n");
> - return NULL;
> - }
> -
> - val = gen_pool_alloc(atomic_pool, size);
> - if (val) {
> - phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
> -
> - *ret_page = phys_to_page(phys);
> - ptr = (void *)val;
> - memset(ptr, 0, size);
> - }
> -
> - return ptr;
> -}
> -
> -static bool __in_atomic_pool(void *start, size_t size)
> -{
> - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
> -}
> -
> -static int __free_from_pool(void *start, size_t size)
> -{
> - if (!__in_atomic_pool(start, size))
> - return 0;
> -
> - gen_pool_free(atomic_pool, (unsigned long)start, size);
> -
> - return 1;
> -}
> -
> -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> - gfp_t flags, unsigned long attrs)
> -{
> - struct page *page;
> - void *ptr, *coherent_ptr;
> - pgprot_t prot = pgprot_writecombine(PAGE_KERNEL);
> -
> - size = PAGE_ALIGN(size);
> -
> - if (!gfpflags_allow_blocking(flags)) {
> - struct page *page = NULL;
> - void *addr = __alloc_from_pool(size, &page, flags);
> -
> - if (addr)
> - *dma_handle = phys_to_dma(dev, page_to_phys(page));
> -
> - return addr;
> - }
> -
> - ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
> - if (!ptr)
> - goto no_mem;
> -
> - /* remove any dirty cache lines on the kernel alias */
> - __dma_flush_area(ptr, size);
> -
> - /* create a coherent mapping */
> - page = virt_to_page(ptr);
> - coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
> - prot, __builtin_return_address(0));
> - if (!coherent_ptr)
> - goto no_map;
> -
> - return coherent_ptr;
> -
> -no_map:
> - dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs);
> -no_mem:
> - return NULL;
> -}
> -
> -void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> - dma_addr_t dma_handle, unsigned long attrs)
> -{
> - if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) {
> - void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
> -
> - vunmap(vaddr);
> - dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
> - }
> -}
> -
> -long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
> - dma_addr_t dma_addr)
> -{
> - return __phys_to_pfn(dma_to_phys(dev, dma_addr));
> -}
> -
> pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
> unsigned long attrs)
> {
> @@ -160,6 +53,11 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
> __dma_unmap_area(phys_to_virt(paddr), size, dir);
> }
>
> +void arch_dma_prep_coherent(struct page *page, size_t size)
> +{
> + __dma_flush_area(page_address(page), size);
> +}
> +
> #ifdef CONFIG_IOMMU_DMA
> static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
> struct page *page, size_t size)
> @@ -191,67 +89,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
> }
> #endif /* CONFIG_IOMMU_DMA */
>
> -static int __init atomic_pool_init(void)
> -{
> - pgprot_t prot = __pgprot(PROT_NORMAL_NC);
> - unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
> - struct page *page;
> - void *addr;
> - unsigned int pool_size_order = get_order(atomic_pool_size);
> -
> - if (dev_get_cma_area(NULL))
> - page = dma_alloc_from_contiguous(NULL, nr_pages,
> - pool_size_order, false);
> - else
> - page = alloc_pages(GFP_DMA32, pool_size_order);
> -
> - if (page) {
> - int ret;
> - void *page_addr = page_address(page);
> -
> - memset(page_addr, 0, atomic_pool_size);
> - __dma_flush_area(page_addr, atomic_pool_size);
> -
> - atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
> - if (!atomic_pool)
> - goto free_page;
> -
> - addr = dma_common_contiguous_remap(page, atomic_pool_size,
> - VM_USERMAP, prot, atomic_pool_init);
> -
> - if (!addr)
> - goto destroy_genpool;
> -
> - ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
> - page_to_phys(page),
> - atomic_pool_size, -1);
> - if (ret)
> - goto remove_mapping;
> -
> - gen_pool_set_algo(atomic_pool,
> - gen_pool_first_fit_order_align,
> - NULL);
> -
> - pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
> - atomic_pool_size / 1024);
> - return 0;
> - }
> - goto out;
> -
> -remove_mapping:
> - dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
> -destroy_genpool:
> - gen_pool_destroy(atomic_pool);
> - atomic_pool = NULL;
> -free_page:
> - if (!dma_release_from_contiguous(NULL, page, nr_pages))
> - __free_pages(page, pool_size_order);
> -out:
> - pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
> - atomic_pool_size / 1024);
> - return -ENOMEM;
> -}
> -
> /********************************************
> * The following APIs are for dummy DMA ops *
> ********************************************/
> @@ -350,8 +187,7 @@ static int __init arm64_dma_init(void)
> TAINT_CPU_OUT_OF_SPEC,
> "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> ARCH_DMA_MINALIGN, cache_line_size());
> -
> - return atomic_pool_init();
> + return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
> }
> arch_initcall(arm64_dma_init);
>
> @@ -397,7 +233,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
> page = alloc_pages(gfp, get_order(size));
> addr = page ? page_address(page) : NULL;
> } else {
> - addr = __alloc_from_pool(size, &page, gfp);
> + addr = dma_alloc_from_pool(size, &page, gfp);
> }
> if (!addr)
> return NULL;
> @@ -407,7 +243,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
> if (coherent)
> __free_pages(page, get_order(size));
> else
> - __free_from_pool(addr, size);
> + dma_free_from_pool(addr, size);
> addr = NULL;
> }
> } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> @@ -471,9 +307,9 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
> * coherent devices.
> * Hence how dodgy the below logic looks...
> */
> - if (__in_atomic_pool(cpu_addr, size)) {
> + if (dma_in_atomic_pool(cpu_addr, size)) {
> iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
> - __free_from_pool(cpu_addr, size);
> + dma_free_from_pool(cpu_addr, size);
> } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> struct page *page = vmalloc_to_page(cpu_addr);
>
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 15bd41447025..56ed94b99963 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -455,6 +455,11 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
> const void *caller);
> void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags);
>
> +int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot);
> +bool dma_in_atomic_pool(void *start, size_t size);
> +void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
> +bool dma_free_from_pool(void *start, size_t size);
> +
> /**
> * dma_mmap_attrs - map a coherent DMA allocation into user space
> * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
> diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
> index 9051b055beec..306557331d7d 100644
> --- a/include/linux/dma-noncoherent.h
> +++ b/include/linux/dma-noncoherent.h
> @@ -69,4 +69,6 @@ static inline void arch_sync_dma_for_cpu_all(struct device *dev)
> }
> #endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */
>
> +void arch_dma_prep_coherent(struct page *page, size_t size);
> +
> #endif /* _LINUX_DMA_NONCOHERENT_H */
> diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
> index c92e08173ed8..fb045ebb0713 100644
> --- a/kernel/dma/Kconfig
> +++ b/kernel/dma/Kconfig
> @@ -55,3 +55,9 @@ config SWIOTLB
> config DMA_REMAP
> depends on MMU
> bool
> +
> +config DMA_DIRECT_REMAP
> + bool
> + depends on DMA_DIRECT_OPS
> + select DMA_REMAP
> +
> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
> index 456f7cc3414d..bc42766f52df 100644
> --- a/kernel/dma/remap.c
> +++ b/kernel/dma/remap.c
> @@ -1,8 +1,13 @@
> // SPDX-License-Identifier: GPL-2.0
> /*
> + * Copyright (C) 2012 ARM Ltd.
> * Copyright (c) 2014 The Linux Foundation
> */
> -#include <linux/dma-mapping.h>
> +#include <linux/dma-direct.h>
> +#include <linux/dma-noncoherent.h>
> +#include <linux/dma-contiguous.h>
> +#include <linux/init.h>
> +#include <linux/genalloc.h>
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
>
> @@ -86,3 +91,154 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
> unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
> vunmap(cpu_addr);
> }
> +
> +#ifdef CONFIG_DMA_DIRECT_REMAP
> +static struct gen_pool *atomic_pool __ro_after_init;
> +
> +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
> +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
> +
> +static int __init early_coherent_pool(char *p)
> +{
> + atomic_pool_size = memparse(p, &p);
> + return 0;
> +}
> +early_param("coherent_pool", early_coherent_pool);
> +
> +int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
> +{
> + unsigned int pool_size_order = get_order(atomic_pool_size);
> + unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
> + struct page *page;
> + void *addr;
> + int ret;
> +
> + if (dev_get_cma_area(NULL))
> + page = dma_alloc_from_contiguous(NULL, nr_pages,
> + pool_size_order, false);
> + else
> + page = alloc_pages(gfp, pool_size_order);
> + if (!page)
> + goto out;
> +
> + memset(page_address(page), 0, atomic_pool_size);
Note that this won't work if 'page' is a highmem page - should there
be a check for that, or a check for the gfp flags?
Also, is this memset() actually useful, or a waste of cycles - when we
allocate from this pool (see dma_alloc_from_pool()), we always memset()
the buffer.
> + arch_dma_prep_coherent(page, atomic_pool_size);
> +
> + atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
> + if (!atomic_pool)
> + goto free_page;
> +
> + addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP,
> + prot, __builtin_return_address(0));
> + if (!addr)
> + goto destroy_genpool;
> +
> + ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
> + page_to_phys(page), atomic_pool_size, -1);
> + if (ret)
> + goto remove_mapping;
> + gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
> +
> + pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
> + atomic_pool_size / 1024);
> + return 0;
> +
> +remove_mapping:
> + dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
> +destroy_genpool:
> + gen_pool_destroy(atomic_pool);
> + atomic_pool = NULL;
> +free_page:
> + if (!dma_release_from_contiguous(NULL, page, nr_pages))
> + __free_pages(page, pool_size_order);
> +out:
> + pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
> + atomic_pool_size / 1024);
> + return -ENOMEM;
> +}
> +
> +bool dma_in_atomic_pool(void *start, size_t size)
> +{
> + return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
> +}
> +
> +void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
> +{
> + unsigned long val;
> + void *ptr = NULL;
> +
> + if (!atomic_pool) {
> + WARN(1, "coherent pool not initialised!\n");
> + return NULL;
> + }
> +
> + val = gen_pool_alloc(atomic_pool, size);
> + if (val) {
> + phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
> +
> + *ret_page = phys_to_page(phys);
> + ptr = (void *)val;
> + memset(ptr, 0, size);
> + }
> +
> + return ptr;
> +}
> +
> +bool dma_free_from_pool(void *start, size_t size)
> +{
> + if (!dma_in_atomic_pool(start, size))
> + return false;
> + gen_pool_free(atomic_pool, (unsigned long)start, size);
> + return true;
> +}
> +
> +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> + gfp_t flags, unsigned long attrs)
> +{
> + struct page *page = NULL;
> + void *ret, *kaddr;
> +
> + size = PAGE_ALIGN(size);
> +
> + if (!gfpflags_allow_blocking(flags)) {
> + ret = dma_alloc_from_pool(size, &page, flags);
> + if (!ret)
> + return NULL;
> + *dma_handle = phys_to_dma(dev, page_to_phys(page));
> + return ret;
> + }
> +
> + kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
> + if (!kaddr)
> + return NULL;
> + page = virt_to_page(kaddr);
> +
> + /* remove any dirty cache lines on the kernel alias */
> + arch_dma_prep_coherent(page, size);
> +
> + /* create a coherent mapping */
> + ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
> + arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
> + __builtin_return_address(0));
> + if (!ret)
> + dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs);
> + return ret;
> +}
> +
> +void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> + dma_addr_t dma_handle, unsigned long attrs)
> +{
> + if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
> + void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
> +
> + vunmap(vaddr);
> + dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
> + }
> +}
> +
> +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
> + dma_addr_t dma_addr)
> +{
> + return __phys_to_pfn(dma_to_phys(dev, dma_addr));
> +}
> +#endif /* CONFIG_DMA_DIRECT_REMAP */
> --
> 2.19.1
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
--
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
> > +int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
> > +{
> > + unsigned int pool_size_order = get_order(atomic_pool_size);
> > + unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
> > + struct page *page;
> > + void *addr;
> > + int ret;
> > +
> > + if (dev_get_cma_area(NULL))
> > + page = dma_alloc_from_contiguous(NULL, nr_pages,
> > + pool_size_order, false);
> > + else
> > + page = alloc_pages(gfp, pool_size_order);
> > + if (!page)
> > + goto out;
> > +
> > + memset(page_address(page), 0, atomic_pool_size);
>
> Note that this won't work if 'page' is a highmem page - should there
> be a check for that, or a check for the gfp flags?
>
> Also, is this memset() actually useful, or a waste of cycles - when we
> allocate from this pool (see dma_alloc_from_pool()), we always memset()
> the buffer.
Currently there is no user that supports highmem, but yes, the memset
should probably simply be removed.
On Tue, Dec 04, 2018 at 09:38:02AM +0100, Marek Szyprowski wrote:
> Hi All,
>
> On 2018-11-30 20:05, Robin Murphy wrote:
> > On 05/11/2018 12:19, Christoph Hellwig wrote:
> >> By using __dma_direct_alloc_pages we can deal entirely with struct page
> >> instead of having to derive a kernel virtual address.
> >
> > Simple enough :)
> >
> > Reviewed-by: Robin Murphy <[email protected]>
>
> This patch has landed linux-next yesterday and I've noticed that it
> breaks operation of many drivers. The change looked simple, but a stupid
> bug managed to slip into the code. After a short investigation I've
> noticed that __dma_direct_alloc_pages() doesn't set dma_handle and zero
> allocated memory, while dma_direct_alloc_pages() did. The other
> difference is the lack of set_memory_decrypted() handling.
>
> Following patch fixes the issue, but maybe it would be better to fix it
> in kernel/dma/direct.c:
Thanks for spotting this Marek. Can you send the patch below with a
signoff and a changelog so that I can queue it up?
Commit bfd56cd60521 ("dma-mapping: support highmem in the generic remap
allocator") replaced dma_direct_alloc_pages() with __dma_direct_alloc_pages(),
which doesn't set dma_handle and zero allocated memory. Fix it by doing this
directly in the caller function.
Fixes: bfd56cd60521 ("dma-mapping: support highmem in the generic remap allocator")
Signed-off-by: Marek Szyprowski <[email protected]>
---
kernel/dma/remap.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index 68a64e3ff6a1..8a44317cfc1a 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -223,8 +223,14 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
__builtin_return_address(0));
- if (!ret)
+ if (!ret) {
__dma_direct_free_pages(dev, size, page);
+ return ret;
+ }
+
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
+ memset(ret, 0, size);
+
return ret;
}
--
2.17.1
On Wed, Dec 05, 2018 at 11:14:01AM +0100, Marek Szyprowski wrote:
> Commit bfd56cd60521 ("dma-mapping: support highmem in the generic remap
> allocator") replaced dma_direct_alloc_pages() with __dma_direct_alloc_pages(),
> which doesn't set dma_handle and zero allocated memory. Fix it by doing this
> directly in the caller function.
>
> Fixes: bfd56cd60521 ("dma-mapping: support highmem in the generic remap allocator")
> Signed-off-by: Marek Szyprowski <[email protected]>
> ---
> kernel/dma/remap.c | 8 +++++++-
> 1 file changed, 7 insertions(+), 1 deletion(-)
Tested-by: Thierry Reding <[email protected]>
Thanks, applied.