Hi all,
this series contains just the cleanup parts from the previous
"a saner API for allocating DMA addressable pages" series. The
intent is to get this in to reduce the amount of patchbombing
for iterations of the real API work.
Add back a hook to optimize dcache flushing after reading executable
code using DMA. This gets ia64 out of the business of pretending to
be dma incoherent just for this optimization.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/ia64/Kconfig | 3 +--
arch/ia64/kernel/dma-mapping.c | 14 +-------------
arch/ia64/mm/init.c | 3 +--
include/linux/dma-direct.h | 3 +++
include/linux/dma-noncoherent.h | 8 ++++++++
kernel/dma/Kconfig | 6 ++++++
kernel/dma/direct.c | 3 +++
7 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 5b4ec80bf5863a..513ba0c5d33610 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -8,6 +8,7 @@ menu "Processor type and features"
config IA64
bool
+ select ARCH_HAS_DMA_MARK_CLEAN
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select ACPI
@@ -32,8 +33,6 @@ config IA64
select TTY
select HAVE_ARCH_TRACEHOOK
select HAVE_VIRT_CPU_ACCOUNTING
- select DMA_NONCOHERENT_MMAP
- select ARCH_HAS_SYNC_DMA_FOR_CPU
select VIRT_TO_BUS
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c
index 09ef9ce9988d1f..f640ed6fe1d576 100644
--- a/arch/ia64/kernel/dma-mapping.c
+++ b/arch/ia64/kernel/dma-mapping.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/dma-direct.h>
+#include <linux/dma-mapping.h>
#include <linux/export.h>
/* Set this to 1 if there is a HW IOMMU in the system */
@@ -7,15 +7,3 @@ int iommu_detected __read_mostly;
const struct dma_map_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
-
-void *arch_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
-{
- return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_addr, unsigned long attrs)
-{
- dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
-}
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 0b3fb4c7af2920..02e5aa08294ee0 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -73,8 +73,7 @@ __ia64_sync_icache_dcache (pte_t pte)
* DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
* flush them when they get mapped into an executable vm-area.
*/
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
- enum dma_data_direction dir)
+void arch_dma_mark_clean(phys_addr_t paddr, size_t size)
{
unsigned long pfn = PHYS_PFN(paddr);
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 6e87225600ae35..95e3e28bd93f47 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -150,6 +150,9 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
if (unlikely(is_swiotlb_buffer(paddr)))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
+
+ if (dir == DMA_FROM_DEVICE)
+ arch_dma_mark_clean(paddr, size);
}
static inline dma_addr_t dma_direct_map_page(struct device *dev,
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index ca09a4e07d2d3d..b9bc6c557ea46f 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -108,6 +108,14 @@ static inline void arch_dma_prep_coherent(struct page *page, size_t size)
}
#endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */
+#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN
+void arch_dma_mark_clean(phys_addr_t paddr, size_t size);
+#else
+static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size)
+{
+}
+#endif /* ARCH_HAS_DMA_MARK_CLEAN */
+
void *arch_dma_set_uncached(void *addr, size_t size);
void arch_dma_clear_uncached(void *addr, size_t size);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index e7b801649f6574..281785feb874db 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -44,6 +44,12 @@ config ARCH_HAS_DMA_SET_MASK
config ARCH_HAS_DMA_WRITE_COMBINE
bool
+#
+# Select if the architectures provides the arch_dma_mark_clean hook
+#
+config ARCH_HAS_DMA_MARK_CLEAN
+ bool
+
config DMA_DECLARE_COHERENT
bool
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index db6ef07aec3b37..949c1cbf08b2d5 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -345,6 +345,9 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
if (unlikely(is_swiotlb_buffer(paddr)))
swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
SYNC_FOR_CPU);
+
+ if (dir == DMA_FROM_DEVICE)
+ arch_dma_mark_clean(paddr, sg->length);
}
if (!dev_is_dma_coherent(dev))
--
2.28.0
Move the detailed gfp_t setup from __dma_direct_alloc_pages into the
caller to clean things up a little.
Signed-off-by: Christoph Hellwig <[email protected]>
---
kernel/dma/direct.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 1d564bea58571b..12e9f5f75dfe4b 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -108,7 +108,7 @@ static inline bool dma_should_free_from_pool(struct device *dev,
}
static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
- gfp_t gfp, unsigned long attrs)
+ gfp_t gfp)
{
int node = dev_to_node(dev);
struct page *page = NULL;
@@ -116,11 +116,6 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
WARN_ON_ONCE(!PAGE_ALIGNED(size));
- if (attrs & DMA_ATTR_NO_WARN)
- gfp |= __GFP_NOWARN;
-
- /* we always manually zero the memory once we are done: */
- gfp &= ~__GFP_ZERO;
gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_limit);
page = dma_alloc_contiguous(dev, size, gfp);
@@ -164,6 +159,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
size = PAGE_ALIGN(size);
+ if (attrs & DMA_ATTR_NO_WARN)
+ gfp |= __GFP_NOWARN;
if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
u64 phys_mask;
@@ -177,7 +174,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
goto done;
}
- page = __dma_direct_alloc_pages(dev, size, gfp, attrs);
+ /* we always manually zero the memory once we are done */
+ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
if (!page)
return NULL;
--
2.28.0
Driver that select DMA_OPS need to depend on HAS_DMA support to
work. The vop driver was missing that dependency, so add it, and also
add a nother depends in DMA_OPS itself. That won't fix the issue due
to how the Kconfig dependencies work, but at least produce a warning
about unmet dependencies.
Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/misc/mic/Kconfig | 1 +
kernel/dma/Kconfig | 1 +
2 files changed, 2 insertions(+)
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index b9bb086785db48..8a7c2c5711d5f4 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -35,6 +35,7 @@ config SCIF_BUS
config VOP_BUS
tristate "VOP Bus Driver"
+ depends on HAS_DMA
select DMA_OPS
help
This option is selected by any driver which registers a
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 0ddfb5510fe45f..e7b801649f6574 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -9,6 +9,7 @@ config HAS_DMA
default y
config DMA_OPS
+ depends on HAS_DMA
bool
#
--
2.28.0
There is no harm in just always clearing the SME encryption bit, while
significantly simplifying the interface.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/arm/include/asm/dma-direct.h | 2 +-
arch/mips/bmips/dma.c | 2 +-
arch/mips/cavium-octeon/dma-octeon.c | 2 +-
arch/mips/include/asm/dma-direct.h | 2 +-
arch/mips/loongson2ef/fuloong-2e/dma.c | 2 +-
arch/mips/loongson2ef/lemote-2f/dma.c | 2 +-
arch/mips/loongson64/dma.c | 2 +-
arch/mips/pci/pci-ar2315.c | 2 +-
arch/mips/pci/pci-xtalk-bridge.c | 2 +-
arch/mips/sgi-ip32/ip32-dma.c | 2 +-
arch/powerpc/include/asm/dma-direct.h | 2 +-
include/linux/dma-direct.h | 14 +++++---------
kernel/dma/direct.c | 6 +-----
13 files changed, 17 insertions(+), 25 deletions(-)
diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h
index 7c3001a6a775bf..a8cee87a93e8ab 100644
--- a/arch/arm/include/asm/dma-direct.h
+++ b/arch/arm/include/asm/dma-direct.h
@@ -8,7 +8,7 @@ static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset;
}
-static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
{
unsigned int offset = dev_addr & ~PAGE_MASK;
return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c
index df56bf4179e347..ba2a5d33dfd3fa 100644
--- a/arch/mips/bmips/dma.c
+++ b/arch/mips/bmips/dma.c
@@ -52,7 +52,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t pa)
return pa;
}
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
{
struct bmips_dma_range *r;
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index 14ea680d180e07..388b13ba2558c2 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -177,7 +177,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
return paddr;
}
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
{
#ifdef CONFIG_PCI
if (dev && dev_is_pci(dev))
diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-direct.h
index 14e352651ce946..8e178651c638c2 100644
--- a/arch/mips/include/asm/dma-direct.h
+++ b/arch/mips/include/asm/dma-direct.h
@@ -3,6 +3,6 @@
#define _MIPS_DMA_DIRECT_H 1
dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
#endif /* _MIPS_DMA_DIRECT_H */
diff --git a/arch/mips/loongson2ef/fuloong-2e/dma.c b/arch/mips/loongson2ef/fuloong-2e/dma.c
index e122292bf6660a..83fadeb3fd7d56 100644
--- a/arch/mips/loongson2ef/fuloong-2e/dma.c
+++ b/arch/mips/loongson2ef/fuloong-2e/dma.c
@@ -6,7 +6,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
return paddr | 0x80000000;
}
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
{
return dma_addr & 0x7fffffff;
}
diff --git a/arch/mips/loongson2ef/lemote-2f/dma.c b/arch/mips/loongson2ef/lemote-2f/dma.c
index abf0e39d7e4696..302b43a14eee74 100644
--- a/arch/mips/loongson2ef/lemote-2f/dma.c
+++ b/arch/mips/loongson2ef/lemote-2f/dma.c
@@ -6,7 +6,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
return paddr | 0x80000000;
}
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
{
if (dma_addr > 0x8fffffff)
return dma_addr;
diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c
index dbfe6e82fddd1c..b3dc5d0bd2b113 100644
--- a/arch/mips/loongson64/dma.c
+++ b/arch/mips/loongson64/dma.c
@@ -13,7 +13,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
return ((nid << 44) ^ paddr) | (nid << node_id_offset);
}
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
{
/* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from
* Loongson-3's 48bit address space and embed it into 40bit */
diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c
index 490953f515282a..d88395684f487d 100644
--- a/arch/mips/pci/pci-ar2315.c
+++ b/arch/mips/pci/pci-ar2315.c
@@ -175,7 +175,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
return paddr + ar2315_dev_offset(dev);
}
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
{
return dma_addr - ar2315_dev_offset(dev);
}
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 9b3cc775c55e05..f1b37f32b55395 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -33,7 +33,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
return bc->baddr + paddr;
}
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
{
return dma_addr & ~(0xffUL << 56);
}
diff --git a/arch/mips/sgi-ip32/ip32-dma.c b/arch/mips/sgi-ip32/ip32-dma.c
index fa7b17cb53853e..160317294d97a9 100644
--- a/arch/mips/sgi-ip32/ip32-dma.c
+++ b/arch/mips/sgi-ip32/ip32-dma.c
@@ -27,7 +27,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
return dma_addr;
}
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
{
phys_addr_t paddr = dma_addr & RAM_OFFSET_MASK;
diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
index abc154d784b078..95b09313d2a4cf 100644
--- a/arch/powerpc/include/asm/dma-direct.h
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -7,7 +7,7 @@ static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
return paddr + dev->archdata.dma_offset;
}
-static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
{
return daddr - dev->archdata.dma_offset;
}
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 20eceb2e4f91f8..f00e262ab6b154 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -24,11 +24,12 @@ static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
}
-static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
{
- phys_addr_t paddr = (phys_addr_t)dev_addr;
+ phys_addr_t paddr = (phys_addr_t)dev_addr +
+ ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
- return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
+ return __sme_clr(paddr);
}
#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
@@ -44,7 +45,7 @@ static inline bool force_dma_unencrypted(struct device *dev)
/*
* If memory encryption is supported, phys_to_dma will set the memory encryption
* bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma
- * and __dma_to_phys versions should only be used on non-encrypted memory for
+ * version should only be used on non-encrypted memory for
* special occasions like DMA coherent buffers.
*/
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
@@ -52,11 +53,6 @@ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
return __sme_set(__phys_to_dma(dev, paddr));
}
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
-{
- return __sme_clr(__dma_to_phys(dev, daddr));
-}
-
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size,
bool is_ram)
{
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 57a6e7d7cf8f16..bfb479c8a370fa 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -48,11 +48,6 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
{
u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
- if (force_dma_unencrypted(dev))
- *phys_limit = __dma_to_phys(dev, dma_limit);
- else
- *phys_limit = dma_to_phys(dev, dma_limit);
-
/*
* Optimistically try the zone that the physical address mask falls
* into first. If that returns memory that isn't actually addressable
@@ -61,6 +56,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
* Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
* zones.
*/
+ *phys_limit = dma_to_phys(dev, dma_limit);
if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits))
return GFP_DMA;
if (*phys_limit <= DMA_BIT_MASK(32))
--
2.28.0
Just merge these helpers into the main dma_direct_{alloc,free} routines,
as the additional checks are always false for the two callers.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/x86/kernel/amd_gart_64.c | 6 +++---
include/linux/dma-direct.h | 4 ----
kernel/dma/direct.c | 39 ++++++++++++++---------------------
kernel/dma/pool.c | 2 +-
4 files changed, 19 insertions(+), 32 deletions(-)
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index bccc5357bffd6c..153374b996a279 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -467,7 +467,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
{
void *vaddr;
- vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs);
+ vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
if (!vaddr ||
!force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
return vaddr;
@@ -479,7 +479,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
goto out_free;
return vaddr;
out_free:
- dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs);
+ dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
return NULL;
}
@@ -489,7 +489,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
- dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs);
+ dma_direct_free(dev, size, vaddr, dma_addr, attrs);
}
static int no_agp;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 95e3e28bd93f47..20eceb2e4f91f8 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -77,10 +77,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs);
void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs);
-void *dma_direct_alloc_pages(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
-void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_addr, unsigned long attrs);
int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs);
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 949c1cbf08b2d5..1d564bea58571b 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -151,13 +151,18 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
return page;
}
-void *dma_direct_alloc_pages(struct device *dev, size_t size,
+void *dma_direct_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
struct page *page;
void *ret;
int err;
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
+ !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ dma_alloc_need_uncached(dev, attrs))
+ return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
+
size = PAGE_ALIGN(size);
if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
@@ -256,11 +261,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
return NULL;
}
-void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_addr, unsigned long attrs)
+void dma_direct_free(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
{
unsigned int page_order = get_order(size);
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
+ !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ dma_alloc_need_uncached(dev, attrs)) {
+ arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
+ return;
+ }
+
/* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
if (dma_should_free_from_pool(dev, attrs) &&
dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
@@ -284,27 +296,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
}
-void *dma_direct_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
-{
- if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
- !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_alloc_need_uncached(dev, attrs))
- return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
- return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
-}
-
-void dma_direct_free(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
-{
- if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
- !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_alloc_need_uncached(dev, attrs))
- arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
- else
- dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
-}
-
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
defined(CONFIG_SWIOTLB)
void dma_direct_sync_sg_for_device(struct device *dev,
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 1281c0f0442bc5..fe11643ff9cc7b 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -115,7 +115,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
#endif
/*
* Memory in the atomic DMA pools must be unencrypted, the pools do not
- * shrink so no re-encryption occurs in dma_direct_free_pages().
+ * shrink so no re-encryption occurs in dma_direct_free().
*/
ret = set_memory_decrypted((unsigned long)page_to_virt(page),
1 << order);
--
2.28.0
The jazzdma ops implement support for a very basic IOMMU. Thus we really
should not use the dma-direct code that takes physical address limits
into account. This survived through the great MIPS DMA ops cleanup mostly
because I was lazy, but now it is time to fully split the implementations.
Signed-off-by: Christoph Hellwig <[email protected]>
Acked-by: Thomas Bogendoerfer <[email protected]>
---
arch/mips/jazz/jazzdma.c | 32 +++++++++++++++++++++-----------
1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index fe40dbed04c1d6..d0b5a2ba2b1a8a 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -16,7 +16,6 @@
#include <linux/memblock.h>
#include <linux/spinlock.h>
#include <linux/gfp.h>
-#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
#include <asm/mipsregs.h>
#include <asm/jazz.h>
@@ -492,26 +491,38 @@ int vdma_get_enable(int channel)
static void *jazz_dma_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
+ struct page *page;
void *ret;
- ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
- if (!ret)
- return NULL;
+ if (attrs & DMA_ATTR_NO_WARN)
+ gfp |= __GFP_NOWARN;
- *dma_handle = vdma_alloc(virt_to_phys(ret), size);
- if (*dma_handle == DMA_MAPPING_ERROR) {
- dma_direct_free_pages(dev, size, ret, *dma_handle, attrs);
+ size = PAGE_ALIGN(size);
+ page = alloc_pages(gfp, get_order(size));
+ if (!page)
return NULL;
- }
+ ret = page_address(page);
+ *dma_handle = vdma_alloc(virt_to_phys(ret), size);
+ if (*dma_handle == DMA_MAPPING_ERROR)
+ goto out_free_pages;
+
+ if (attrs & DMA_ATTR_NON_CONSISTENT)
+ return ret;
+ arch_dma_prep_coherent(page, size);
+ return (void *)(UNCAC_BASE + __pa(ret));
- return ret;
+out_free_pages:
+ __free_pages(page, get_order(size));
+ return NULL;
}
static void jazz_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
vdma_free(dma_handle);
- dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs);
+ if (!(attrs & DMA_ATTR_NON_CONSISTENT))
+ vaddr = __va(vaddr - UNCAC_BASE);
+ __free_pages(virt_to_page(vaddr), get_order(size));
}
static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page,
@@ -608,7 +619,6 @@ const struct dma_map_ops jazz_dma_ops = {
.sync_single_for_device = jazz_dma_sync_single_for_device,
.sync_sg_for_cpu = jazz_dma_sync_sg_for_cpu,
.sync_sg_for_device = jazz_dma_sync_sg_for_device,
- .dma_supported = dma_direct_supported,
.cache_sync = arch_dma_cache_sync,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
--
2.28.0
Replace the currently open code copy.
Signed-off-by: Christoph Hellwig <[email protected]>
---
kernel/dma/direct.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 12e9f5f75dfe4b..57a6e7d7cf8f16 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -240,10 +240,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
goto out_encrypt_pages;
}
done:
- if (force_dma_unencrypted(dev))
- *dma_handle = __phys_to_dma(dev, page_to_phys(page));
- else
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
return ret;
out_encrypt_pages:
--
2.28.0
Hello!
On 9/8/20 7:47 PM, Christoph Hellwig wrote:
> Driver that select DMA_OPS need to depend on HAS_DMA support to
> work. The vop driver was missing that dependency, so add it, and also
> add a nother depends in DMA_OPS itself. That won't fix the issue due
Another? :-)
> to how the Kconfig dependencies work, but at least produce a warning
> about unmet dependencies.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
[...]
MBR, Sergei
On 2020-09-08 17:47, Christoph Hellwig wrote:
> Replace the currently open code copy.
Reviewed-by: Robin Murphy <[email protected]>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> kernel/dma/direct.c | 5 +----
> 1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 12e9f5f75dfe4b..57a6e7d7cf8f16 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -240,10 +240,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
> goto out_encrypt_pages;
> }
> done:
> - if (force_dma_unencrypted(dev))
> - *dma_handle = __phys_to_dma(dev, page_to_phys(page));
> - else
> - *dma_handle = phys_to_dma(dev, page_to_phys(page));
> + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
> return ret;
>
> out_encrypt_pages:
>
On 2020-09-08 17:47, Christoph Hellwig wrote:
> There is no harm in just always clearing the SME encryption bit, while
> significantly simplifying the interface.
After a 10-minute diversion into "but hang on, force_dma_unencrypted()
is meaningful on PPC and S390 too..." before realising that it all does
just come back to __sme_clr(), which is indeed a no-op for everyone
other than AMD, any simplification of this mess is indeed welcome :)
Unless I've massively misunderstood how SME is supposed to work,
Reviewed-by: Robin Murphy <[email protected]>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/arm/include/asm/dma-direct.h | 2 +-
> arch/mips/bmips/dma.c | 2 +-
> arch/mips/cavium-octeon/dma-octeon.c | 2 +-
> arch/mips/include/asm/dma-direct.h | 2 +-
> arch/mips/loongson2ef/fuloong-2e/dma.c | 2 +-
> arch/mips/loongson2ef/lemote-2f/dma.c | 2 +-
> arch/mips/loongson64/dma.c | 2 +-
> arch/mips/pci/pci-ar2315.c | 2 +-
> arch/mips/pci/pci-xtalk-bridge.c | 2 +-
> arch/mips/sgi-ip32/ip32-dma.c | 2 +-
> arch/powerpc/include/asm/dma-direct.h | 2 +-
> include/linux/dma-direct.h | 14 +++++---------
> kernel/dma/direct.c | 6 +-----
> 13 files changed, 17 insertions(+), 25 deletions(-)
>
> diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h
> index 7c3001a6a775bf..a8cee87a93e8ab 100644
> --- a/arch/arm/include/asm/dma-direct.h
> +++ b/arch/arm/include/asm/dma-direct.h
> @@ -8,7 +8,7 @@ static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset;
> }
>
> -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> {
> unsigned int offset = dev_addr & ~PAGE_MASK;
> return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
> diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c
> index df56bf4179e347..ba2a5d33dfd3fa 100644
> --- a/arch/mips/bmips/dma.c
> +++ b/arch/mips/bmips/dma.c
> @@ -52,7 +52,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t pa)
> return pa;
> }
>
> -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> {
> struct bmips_dma_range *r;
>
> diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
> index 14ea680d180e07..388b13ba2558c2 100644
> --- a/arch/mips/cavium-octeon/dma-octeon.c
> +++ b/arch/mips/cavium-octeon/dma-octeon.c
> @@ -177,7 +177,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> return paddr;
> }
>
> -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> #ifdef CONFIG_PCI
> if (dev && dev_is_pci(dev))
> diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-direct.h
> index 14e352651ce946..8e178651c638c2 100644
> --- a/arch/mips/include/asm/dma-direct.h
> +++ b/arch/mips/include/asm/dma-direct.h
> @@ -3,6 +3,6 @@
> #define _MIPS_DMA_DIRECT_H 1
>
> dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
> -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
> +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
>
> #endif /* _MIPS_DMA_DIRECT_H */
> diff --git a/arch/mips/loongson2ef/fuloong-2e/dma.c b/arch/mips/loongson2ef/fuloong-2e/dma.c
> index e122292bf6660a..83fadeb3fd7d56 100644
> --- a/arch/mips/loongson2ef/fuloong-2e/dma.c
> +++ b/arch/mips/loongson2ef/fuloong-2e/dma.c
> @@ -6,7 +6,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> return paddr | 0x80000000;
> }
>
> -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> {
> return dma_addr & 0x7fffffff;
> }
> diff --git a/arch/mips/loongson2ef/lemote-2f/dma.c b/arch/mips/loongson2ef/lemote-2f/dma.c
> index abf0e39d7e4696..302b43a14eee74 100644
> --- a/arch/mips/loongson2ef/lemote-2f/dma.c
> +++ b/arch/mips/loongson2ef/lemote-2f/dma.c
> @@ -6,7 +6,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> return paddr | 0x80000000;
> }
>
> -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> {
> if (dma_addr > 0x8fffffff)
> return dma_addr;
> diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c
> index dbfe6e82fddd1c..b3dc5d0bd2b113 100644
> --- a/arch/mips/loongson64/dma.c
> +++ b/arch/mips/loongson64/dma.c
> @@ -13,7 +13,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> return ((nid << 44) ^ paddr) | (nid << node_id_offset);
> }
>
> -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from
> * Loongson-3's 48bit address space and embed it into 40bit */
> diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c
> index 490953f515282a..d88395684f487d 100644
> --- a/arch/mips/pci/pci-ar2315.c
> +++ b/arch/mips/pci/pci-ar2315.c
> @@ -175,7 +175,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> return paddr + ar2315_dev_offset(dev);
> }
>
> -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> {
> return dma_addr - ar2315_dev_offset(dev);
> }
> diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
> index 9b3cc775c55e05..f1b37f32b55395 100644
> --- a/arch/mips/pci/pci-xtalk-bridge.c
> +++ b/arch/mips/pci/pci-xtalk-bridge.c
> @@ -33,7 +33,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> return bc->baddr + paddr;
> }
>
> -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> {
> return dma_addr & ~(0xffUL << 56);
> }
> diff --git a/arch/mips/sgi-ip32/ip32-dma.c b/arch/mips/sgi-ip32/ip32-dma.c
> index fa7b17cb53853e..160317294d97a9 100644
> --- a/arch/mips/sgi-ip32/ip32-dma.c
> +++ b/arch/mips/sgi-ip32/ip32-dma.c
> @@ -27,7 +27,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> return dma_addr;
> }
>
> -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
> {
> phys_addr_t paddr = dma_addr & RAM_OFFSET_MASK;
>
> diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
> index abc154d784b078..95b09313d2a4cf 100644
> --- a/arch/powerpc/include/asm/dma-direct.h
> +++ b/arch/powerpc/include/asm/dma-direct.h
> @@ -7,7 +7,7 @@ static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> return paddr + dev->archdata.dma_offset;
> }
>
> -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> return daddr - dev->archdata.dma_offset;
> }
> diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
> index 20eceb2e4f91f8..f00e262ab6b154 100644
> --- a/include/linux/dma-direct.h
> +++ b/include/linux/dma-direct.h
> @@ -24,11 +24,12 @@ static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
> }
>
> -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> {
> - phys_addr_t paddr = (phys_addr_t)dev_addr;
> + phys_addr_t paddr = (phys_addr_t)dev_addr +
> + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
>
> - return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
> + return __sme_clr(paddr);
> }
> #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
>
> @@ -44,7 +45,7 @@ static inline bool force_dma_unencrypted(struct device *dev)
> /*
> * If memory encryption is supported, phys_to_dma will set the memory encryption
> * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma
> - * and __dma_to_phys versions should only be used on non-encrypted memory for
> + * version should only be used on non-encrypted memory for
> * special occasions like DMA coherent buffers.
> */
> static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> @@ -52,11 +53,6 @@ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> return __sme_set(__phys_to_dma(dev, paddr));
> }
>
> -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> -{
> - return __sme_clr(__dma_to_phys(dev, daddr));
> -}
> -
> static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size,
> bool is_ram)
> {
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 57a6e7d7cf8f16..bfb479c8a370fa 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -48,11 +48,6 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
> {
> u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
>
> - if (force_dma_unencrypted(dev))
> - *phys_limit = __dma_to_phys(dev, dma_limit);
> - else
> - *phys_limit = dma_to_phys(dev, dma_limit);
> -
> /*
> * Optimistically try the zone that the physical address mask falls
> * into first. If that returns memory that isn't actually addressable
> @@ -61,6 +56,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
> * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
> * zones.
> */
> + *phys_limit = dma_to_phys(dev, dma_limit);
> if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits))
> return GFP_DMA;
> if (*phys_limit <= DMA_BIT_MASK(32))
>
On 2020-09-08 17:47, Christoph Hellwig wrote:
> Move the detailed gfp_t setup from __dma_direct_alloc_pages into the
> caller to clean things up a little.
Other than a mild nitpick that it might be nicer to spend one extra line
to keep both gfp adjustments next to each other,
Reviewed-by: Robin Murphy <[email protected]>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> kernel/dma/direct.c | 12 +++++-------
> 1 file changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 1d564bea58571b..12e9f5f75dfe4b 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -108,7 +108,7 @@ static inline bool dma_should_free_from_pool(struct device *dev,
> }
>
> static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
> - gfp_t gfp, unsigned long attrs)
> + gfp_t gfp)
> {
> int node = dev_to_node(dev);
> struct page *page = NULL;
> @@ -116,11 +116,6 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
>
> WARN_ON_ONCE(!PAGE_ALIGNED(size));
>
> - if (attrs & DMA_ATTR_NO_WARN)
> - gfp |= __GFP_NOWARN;
> -
> - /* we always manually zero the memory once we are done: */
> - gfp &= ~__GFP_ZERO;
> gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
> &phys_limit);
> page = dma_alloc_contiguous(dev, size, gfp);
> @@ -164,6 +159,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
> return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
>
> size = PAGE_ALIGN(size);
> + if (attrs & DMA_ATTR_NO_WARN)
> + gfp |= __GFP_NOWARN;
>
> if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
> u64 phys_mask;
> @@ -177,7 +174,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
> goto done;
> }
>
> - page = __dma_direct_alloc_pages(dev, size, gfp, attrs);
> + /* we always manually zero the memory once we are done */
> + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
> if (!page)
> return NULL;
>
>
On 2020-09-08 17:47, Christoph Hellwig wrote:
> Driver that select DMA_OPS need to depend on HAS_DMA support to
> work. The vop driver was missing that dependency, so add it, and also
> add a nother depends in DMA_OPS itself. That won't fix the issue due
> to how the Kconfig dependencies work, but at least produce a warning
> about unmet dependencies.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> drivers/misc/mic/Kconfig | 1 +
> kernel/dma/Kconfig | 1 +
> 2 files changed, 2 insertions(+)
>
> diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
> index b9bb086785db48..8a7c2c5711d5f4 100644
> --- a/drivers/misc/mic/Kconfig
> +++ b/drivers/misc/mic/Kconfig
> @@ -35,6 +35,7 @@ config SCIF_BUS
>
> config VOP_BUS
> tristate "VOP Bus Driver"
> + depends on HAS_DMA
> select DMA_OPS
AFAICS all three of these bus drivers are only proxying a struct
dma_map_ops * pointer around, so if they used the set_dma_ops() helper
they shouldn't even need these selects at all. Only INTEL_MIC_HOST
appears to have a logical dependency on DMA_OPS for actual functionality.
However, I have a vague feeling you might not be fond of those dma_ops
helpers, and I have no great objection to this one-liner as-is, so
(modulo the couple of commit message typos),
Reviewed-by: Robin Murphy <[email protected]>
(of course the hunk below is unquestionably OK)
Robin.
> help
> This option is selected by any driver which registers a
> diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
> index 0ddfb5510fe45f..e7b801649f6574 100644
> --- a/kernel/dma/Kconfig
> +++ b/kernel/dma/Kconfig
> @@ -9,6 +9,7 @@ config HAS_DMA
> default y
>
> config DMA_OPS
> + depends on HAS_DMA
> bool
>
> #
>
On 2020-09-08 17:47, Christoph Hellwig wrote:
> Just merge these helpers into the main dma_direct_{alloc,free} routines,
> as the additional checks are always false for the two callers.
Reviewed-by: Robin Murphy <[email protected]>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/x86/kernel/amd_gart_64.c | 6 +++---
> include/linux/dma-direct.h | 4 ----
> kernel/dma/direct.c | 39 ++++++++++++++---------------------
> kernel/dma/pool.c | 2 +-
> 4 files changed, 19 insertions(+), 32 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
> index bccc5357bffd6c..153374b996a279 100644
> --- a/arch/x86/kernel/amd_gart_64.c
> +++ b/arch/x86/kernel/amd_gart_64.c
> @@ -467,7 +467,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
> {
> void *vaddr;
>
> - vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs);
> + vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
> if (!vaddr ||
> !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
> return vaddr;
> @@ -479,7 +479,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
> goto out_free;
> return vaddr;
> out_free:
> - dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs);
> + dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
> return NULL;
> }
>
> @@ -489,7 +489,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
> dma_addr_t dma_addr, unsigned long attrs)
> {
> gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
> - dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs);
> + dma_direct_free(dev, size, vaddr, dma_addr, attrs);
> }
>
> static int no_agp;
> diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
> index 95e3e28bd93f47..20eceb2e4f91f8 100644
> --- a/include/linux/dma-direct.h
> +++ b/include/linux/dma-direct.h
> @@ -77,10 +77,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> gfp_t gfp, unsigned long attrs);
> void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
> dma_addr_t dma_addr, unsigned long attrs);
> -void *dma_direct_alloc_pages(struct device *dev, size_t size,
> - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
> -void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
> - dma_addr_t dma_addr, unsigned long attrs);
> int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt,
> void *cpu_addr, dma_addr_t dma_addr, size_t size,
> unsigned long attrs);
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 949c1cbf08b2d5..1d564bea58571b 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -151,13 +151,18 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
> return page;
> }
>
> -void *dma_direct_alloc_pages(struct device *dev, size_t size,
> +void *dma_direct_alloc(struct device *dev, size_t size,
> dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
> {
> struct page *page;
> void *ret;
> int err;
>
> + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
> + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> + dma_alloc_need_uncached(dev, attrs))
> + return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
> +
> size = PAGE_ALIGN(size);
>
> if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
> @@ -256,11 +261,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
> return NULL;
> }
>
> -void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
> - dma_addr_t dma_addr, unsigned long attrs)
> +void dma_direct_free(struct device *dev, size_t size,
> + void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
> {
> unsigned int page_order = get_order(size);
>
> + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
> + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> + dma_alloc_need_uncached(dev, attrs)) {
> + arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
> + return;
> + }
> +
> /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
> if (dma_should_free_from_pool(dev, attrs) &&
> dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
> @@ -284,27 +296,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
> dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
> }
>
> -void *dma_direct_alloc(struct device *dev, size_t size,
> - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
> -{
> - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
> - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> - dma_alloc_need_uncached(dev, attrs))
> - return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
> - return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
> -}
> -
> -void dma_direct_free(struct device *dev, size_t size,
> - void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
> -{
> - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
> - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> - dma_alloc_need_uncached(dev, attrs))
> - arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
> - else
> - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
> -}
> -
> #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
> defined(CONFIG_SWIOTLB)
> void dma_direct_sync_sg_for_device(struct device *dev,
> diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
> index 1281c0f0442bc5..fe11643ff9cc7b 100644
> --- a/kernel/dma/pool.c
> +++ b/kernel/dma/pool.c
> @@ -115,7 +115,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
> #endif
> /*
> * Memory in the atomic DMA pools must be unencrypted, the pools do not
> - * shrink so no re-encryption occurs in dma_direct_free_pages().
> + * shrink so no re-encryption occurs in dma_direct_free().
> */
> ret = set_memory_decrypted((unsigned long)page_to_virt(page),
> 1 << order);
>
On Thu, Sep 10, 2020 at 10:12:33PM +0800, Hillf Danton wrote:
>
> On Tue, 8 Sep 2020 18:47:49 +0200 Christoph Hellwig wrote:
> >
> > The jazzdma ops implement support for a very basic IOMMU. Thus we really
> > should not use the dma-direct code that takes physical address limits
> > into account. This survived through the great MIPS DMA ops cleanup mostly
> > because I was lazy, but now it is time to fully split the implementations.
>
> A minor change in behavior, do we need to clear the dma buf as the
> dma-direct code does?
Yes, I've fixed this up.
On Thu, Sep 10, 2020 at 01:55:37PM +0100, Robin Murphy wrote:
> AFAICS all three of these bus drivers are only proxying a struct
> dma_map_ops * pointer around, so if they used the set_dma_ops() helper they
> shouldn't even need these selects at all. Only INTEL_MIC_HOST appears to
> have a logical dependency on DMA_OPS for actual functionality.
>
> However, I have a vague feeling you might not be fond of those dma_ops
> helpers, and I have no great objection to this one-liner as-is, so (modulo
> the couple of commit message typos),
The problem with these inherÑ–tances is that they don't actually work
for the general case. You'd also need to inherity things like the
dma ranges, the bus limits, etc, etc. So we need to kill them instead.
That whole mic/vop case is even worse than that with it's weird set
of chained dma ops that seems to implement some kind of device side
iommu that isn't in scope for the DMA API at all.
On Tue, Sep 08, 2020 at 09:04:17PM +0300, Sergei Shtylyov wrote:
> Hello!
>
> On 9/8/20 7:47 PM, Christoph Hellwig wrote:
>
> > Driver that select DMA_OPS need to depend on HAS_DMA support to
> > work. The vop driver was missing that dependency, so add it, and also
> > add a nother depends in DMA_OPS itself. That won't fix the issue due
>
> Another? :-)
Yes, fixed.
On Thu, Sep 10, 2020 at 02:26:03PM +0100, Robin Murphy wrote:
> On 2020-09-08 17:47, Christoph Hellwig wrote:
>> There is no harm in just always clearing the SME encryption bit, while
>> significantly simplifying the interface.
>
> After a 10-minute diversion into "but hang on, force_dma_unencrypted() is
> meaningful on PPC and S390 too..." before realising that it all does just
> come back to __sme_clr(), which is indeed a no-op for everyone other than
> AMD, any simplification of this mess is indeed welcome :)
>
> Unless I've massively misunderstood how SME is supposed to work,
Exactly. This weird encryption bit in AMD SME causes all kinds of harm,
and I'm glad no one picked it up. I've also been wondering if we
should change the interface to explicit set/clear the bit, but I'll
leave that for another pass as fixing up the SME interfaces would turn
into a massive disgression.