Hi all,
this series switches the x86 code the the dma-direct implementation
for direct (non-iommu) dma and the generic swiotlb ops. This includes
getting rid of the special ops for the AMD memory encryption case and
the STA2x11 SOC. The generic implementations are based on the x86
code, so they provide the same functionality.
Changes since V2:
- minor changelog tweaks
- made is_sta2x11 a plain bool
Changes since V1:
- fix the length in the set_memory_decrypted call
- fix the SEV case
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
---
include/linux/set_memory.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index e5140648f638..da5178216da5 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -17,4 +17,16 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif
+#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
+static inline int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ return 0;
+}
+
+static inline int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ return 0;
+}
+#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
+
#endif /* _LINUX_SET_MEMORY_H_ */
--
2.14.2
Unused now that everyone uses swiotlb_{alloc,free}.
Signed-off-by: Christoph Hellwig <[email protected]>
---
include/linux/swiotlb.h | 8 --------
lib/swiotlb.c | 38 --------------------------------------
2 files changed, 46 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 5b1f2a00491c..965be92c33b5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -72,14 +72,6 @@ void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle,
void swiotlb_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs);
-extern void
-*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags);
-
-extern void
-swiotlb_free_coherent(struct device *hwdev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 8b06b4485e65..15954b86f09e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
-/* Note that this doesn't work with highmem page */
-static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
- volatile void *address)
-{
- return phys_to_dma(hwdev, virt_to_phys(address));
-}
-
static bool no_iotlb_memory;
void swiotlb_print_info(void)
@@ -752,28 +745,6 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
return NULL;
}
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags)
-{
- int order = get_order(size);
- unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0;
- void *ret;
-
- ret = (void *)__get_free_pages(flags, order);
- if (ret) {
- *dma_handle = swiotlb_virt_to_bus(hwdev, ret);
- if (dma_coherent_ok(hwdev, *dma_handle, size)) {
- memset(ret, 0, size);
- return ret;
- }
- free_pages((unsigned long)ret, order);
- }
-
- return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs);
-}
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-
static bool swiotlb_free_buffer(struct device *dev, size_t size,
dma_addr_t dma_addr)
{
@@ -793,15 +764,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size,
return true;
}
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
- dma_addr_t dev_addr)
-{
- if (!swiotlb_free_buffer(hwdev, size, dev_addr))
- free_pages((unsigned long)vaddr, get_order(size));
-}
-EXPORT_SYMBOL(swiotlb_free_coherent);
-
static void
swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
int do_panic)
--
2.14.2
The generic dma-direct implementation is now functionally equivalent to
the x86 nommu dma_map implementation, so switch over to using it. That
includes switching from using x86_dma_supported in various iommu drivers
to use dma_direct_supported instead, which provides the same
functionality.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/dma-mapping.h | 8 -----
arch/x86/include/asm/iommu.h | 3 --
arch/x86/kernel/Makefile | 2 +-
arch/x86/kernel/amd_gart_64.c | 7 ++--
arch/x86/kernel/pci-calgary_64.c | 3 +-
arch/x86/kernel/pci-dma.c | 66 +-------------------------------------
arch/x86/kernel/pci-swiotlb.c | 5 ++-
arch/x86/pci/sta2x11-fixup.c | 2 +-
drivers/iommu/amd_iommu.c | 7 ++--
drivers/iommu/intel-iommu.c | 3 +-
11 files changed, 17 insertions(+), 90 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0fa71a78ec99..10f482beda1b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -83,6 +83,7 @@ config X86
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
select CLOCKSOURCE_WATCHDOG
select DCACHE_WORD_ACCESS
+ select DMA_DIRECT_OPS
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select GENERIC_CLOCKEVENTS
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 545bf3721bc0..df9816b385eb 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,14 +36,6 @@ int arch_dma_supported(struct device *dev, u64 mask);
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
#define arch_dma_alloc_attrs arch_dma_alloc_attrs
-extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag,
- unsigned long attrs);
-
-extern void dma_generic_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs);
-
static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
{
if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 1e5d5d92eb40..baedab8ac538 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,13 +2,10 @@
#ifndef _ASM_X86_IOMMU_H
#define _ASM_X86_IOMMU_H
-extern const struct dma_map_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int iommu_pass_through;
-int x86_dma_supported(struct device *dev, u64 mask);
-
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 29786c87e864..2e8c8a09ecab 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
-obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
+obj-y += alternative.o i8253.o hw_breakpoint.o
obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index ecd486cb06ab..52e3abcf3e70 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -501,8 +501,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
}
__free_pages(page, get_order(size));
} else
- return dma_generic_alloc_coherent(dev, size, dma_addr, flag,
- attrs);
+ return dma_direct_alloc(dev, size, dma_addr, flag, attrs);
return NULL;
}
@@ -513,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
- dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
+ dma_direct_free(dev, size, vaddr, dma_addr, attrs);
}
static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -705,7 +704,7 @@ static const struct dma_map_ops gart_dma_ops = {
.alloc = gart_alloc_coherent,
.free = gart_free_coherent,
.mapping_error = gart_mapping_error,
- .dma_supported = x86_dma_supported,
+ .dma_supported = dma_direct_supported,
};
static void gart_iommu_shutdown(void)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 35c461f21815..5647853053bd 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/crash_dump.h>
#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/bitmap.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
@@ -493,7 +494,7 @@ static const struct dma_map_ops calgary_dma_ops = {
.map_page = calgary_map_page,
.unmap_page = calgary_unmap_page,
.mapping_error = calgary_mapping_error,
- .dma_supported = x86_dma_supported,
+ .dma_supported = dma_direct_supported,
};
static inline void __iomem * busno_to_bbar(unsigned char num)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index b59820872ec7..db0b88ea8d1b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -18,7 +18,7 @@
static int forbid_dac __read_mostly;
-const struct dma_map_ops *dma_ops = &nommu_dma_ops;
+const struct dma_map_ops *dma_ops = &dma_direct_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
@@ -76,60 +76,6 @@ void __init pci_iommu_alloc(void)
}
}
}
-void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag,
- unsigned long attrs)
-{
- struct page *page;
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- dma_addr_t addr;
-
-again:
- page = NULL;
- /* CMA can be used only in the context which permits sleeping */
- if (gfpflags_allow_blocking(flag)) {
- page = dma_alloc_from_contiguous(dev, count, get_order(size),
- flag);
- if (page) {
- addr = phys_to_dma(dev, page_to_phys(page));
- if (addr + size > dev->coherent_dma_mask) {
- dma_release_from_contiguous(dev, page, count);
- page = NULL;
- }
- }
- }
- /* fallback */
- if (!page)
- page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
- if (!page)
- return NULL;
-
- addr = phys_to_dma(dev, page_to_phys(page));
- if (addr + size > dev->coherent_dma_mask) {
- __free_pages(page, get_order(size));
-
- if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
- !(flag & GFP_DMA)) {
- flag = (flag & ~GFP_DMA32) | GFP_DMA;
- goto again;
- }
-
- return NULL;
- }
- memset(page_address(page), 0, size);
- *dma_addr = addr;
- return page_address(page);
-}
-
-void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, unsigned long attrs)
-{
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- struct page *page = virt_to_page(vaddr);
-
- if (!dma_release_from_contiguous(dev, page, count))
- free_pages((unsigned long)vaddr, get_order(size));
-}
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
{
@@ -243,16 +189,6 @@ int arch_dma_supported(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(arch_dma_supported);
-int x86_dma_supported(struct device *dev, u64 mask)
-{
- /* Copied from i386. Doesn't make much sense, because it will
- only work for pci_alloc_coherent.
- The caller just has to use GFP_DMA in this case. */
- if (mask < DMA_BIT_MASK(24))
- return 0;
- return 1;
-}
-
static int __init pci_iommu_init(void)
{
struct iommu_table_entry *p;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 0ee0f8f34251..bcb6a9bf64ad 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -30,8 +30,7 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
*/
flags |= __GFP_NOWARN;
- vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
- attrs);
+ vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
if (vaddr)
return vaddr;
@@ -45,7 +44,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size,
if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
swiotlb_free_coherent(dev, size, vaddr, dma_addr);
else
- dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
+ dma_direct_free(dev, size, vaddr, dma_addr, attrs);
}
static const struct dma_map_ops x86_swiotlb_dma_ops = {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 75577c1490c4..6c712fe11bdc 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -193,7 +193,7 @@ static const struct dma_map_ops sta2x11_dma_ops = {
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = swiotlb_sync_sg_for_device,
.mapping_error = swiotlb_dma_mapping_error,
- .dma_supported = x86_dma_supported,
+ .dma_supported = dma_direct_supported,
};
/* At setup time, we use our own ops if the device is a ConneXt one */
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 74788fdeb773..0bf19423b588 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -28,6 +28,7 @@
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/iommu-helper.h>
#include <linux/iommu.h>
#include <linux/delay.h>
@@ -2193,7 +2194,7 @@ static int amd_iommu_add_device(struct device *dev)
dev_name(dev));
iommu_ignore_device(dev);
- dev->dma_ops = &nommu_dma_ops;
+ dev->dma_ops = &dma_direct_ops;
goto out;
}
init_iommu_group(dev);
@@ -2680,7 +2681,7 @@ static void free_coherent(struct device *dev, size_t size,
*/
static int amd_iommu_dma_supported(struct device *dev, u64 mask)
{
- if (!x86_dma_supported(dev, mask))
+ if (!dma_direct_supported(dev, mask))
return 0;
return check_device(dev);
}
@@ -2794,7 +2795,7 @@ int __init amd_iommu_init_dma_ops(void)
* continue to be SWIOTLB.
*/
if (!swiotlb)
- dma_ops = &nommu_dma_ops;
+ dma_ops = &dma_direct_ops;
if (amd_iommu_unmap_flush)
pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 582fd01cb7d1..fd899b2a12bb 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -45,6 +45,7 @@
#include <linux/pci-ats.h>
#include <linux/memblock.h>
#include <linux/dma-contiguous.h>
+#include <linux/dma-direct.h>
#include <linux/crash_dump.h>
#include <asm/irq_remapping.h>
#include <asm/cacheflush.h>
@@ -3871,7 +3872,7 @@ const struct dma_map_ops intel_dma_ops = {
.unmap_page = intel_unmap_page,
.mapping_error = intel_mapping_error,
#ifdef CONFIG_X86
- .dma_supported = x86_dma_supported,
+ .dma_supported = dma_direct_supported,
#endif
};
--
2.14.2
We want to phase out looking at the magic GFP_DMA flag in the dma mapping
routines, so switch the gart driver to use the coherent_dma_mask instead,
which is used to select the GFP_DMA flag in the caller.
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
---
arch/x86/kernel/amd_gart_64.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 52e3abcf3e70..79ac6caaaabb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -484,7 +484,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
unsigned long align_mask;
struct page *page;
- if (force_iommu && !(flag & GFP_DMA)) {
+ if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
page = alloc_pages(flag | __GFP_ZERO, get_order(size));
if (!page)
--
2.14.2
Use the dma_direct_* helpers and cleanup the code flow.
Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/iommu/Kconfig | 1 +
drivers/iommu/intel-iommu.c | 62 ++++++++++++---------------------------------
2 files changed, 17 insertions(+), 46 deletions(-)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dc7c1914645d..df171cb85822 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -143,6 +143,7 @@ config DMAR_TABLE
config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
+ select DMA_DIRECT_OPS
select IOMMU_API
select IOMMU_IOVA
select DMAR_TABLE
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index fd899b2a12bb..24d1b1b42013 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -31,6 +31,7 @@
#include <linux/pci.h>
#include <linux/dmar.h>
#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/mempool.h>
#include <linux/memory.h>
#include <linux/cpu.h>
@@ -3708,61 +3709,30 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
unsigned long attrs)
{
- struct page *page = NULL;
- int order;
-
- size = PAGE_ALIGN(size);
- order = get_order(size);
-
- if (!iommu_no_mapping(dev))
- flags &= ~(GFP_DMA | GFP_DMA32);
- else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
- if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
- flags |= GFP_DMA;
- else
- flags |= GFP_DMA32;
- }
+ void *vaddr;
- if (gfpflags_allow_blocking(flags)) {
- unsigned int count = size >> PAGE_SHIFT;
+ vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+ if (iommu_no_mapping(dev) || !vaddr)
+ return vaddr;
- page = dma_alloc_from_contiguous(dev, count, order, flags);
- if (page && iommu_no_mapping(dev) &&
- page_to_phys(page) + size > dev->coherent_dma_mask) {
- dma_release_from_contiguous(dev, page, count);
- page = NULL;
- }
- }
-
- if (!page)
- page = alloc_pages(flags, order);
- if (!page)
- return NULL;
- memset(page_address(page), 0, size);
-
- *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
- DMA_BIDIRECTIONAL,
- dev->coherent_dma_mask);
- if (*dma_handle)
- return page_address(page);
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, order);
+ *dma_handle = __intel_map_single(dev, virt_to_phys(vaddr),
+ PAGE_ALIGN(size), DMA_BIDIRECTIONAL,
+ dev->coherent_dma_mask);
+ if (!*dma_handle)
+ goto out_free_pages;
+ return vaddr;
+out_free_pages:
+ dma_direct_free(dev, size, vaddr, *dma_handle, attrs);
return NULL;
}
static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
- int order;
- struct page *page = virt_to_page(vaddr);
-
- size = PAGE_ALIGN(size);
- order = get_order(size);
-
- intel_unmap(dev, dma_handle, size);
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, order);
+ if (!iommu_no_mapping(dev))
+ intel_unmap(dev, dma_handle, PAGE_ALIGN(size));
+ dma_direct_free(dev, size, vaddr, dma_handle, attrs);
}
static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
--
2.14.2
There were only a few Pentium Pro multiprocessors systems where this
errata applied. They are more than 20 years old now, and we've slowly
dropped places where put the workarounds in and discouraged anyone
from enabling the workaround.
Get rid of it for good.
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
---
arch/x86/Kconfig.cpu | 13 -------------
arch/x86/entry/vdso/vdso32/vclock_gettime.c | 2 --
arch/x86/include/asm/barrier.h | 30 -----------------------------
arch/x86/include/asm/io.h | 15 ---------------
arch/x86/kernel/pci-nommu.c | 19 ------------------
arch/x86/um/asm/barrier.h | 4 ----
6 files changed, 83 deletions(-)
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8b8d2297d486..638411f22267 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT
default "4" if MELAN || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-config X86_PPRO_FENCE
- bool "PentiumPro memory ordering errata workaround"
- depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
- ---help---
- Old PentiumPro multiprocessor systems had errata that could cause
- memory operations to violate the x86 ordering standard in rare cases.
- Enabling this option will attempt to work around some (but not all)
- occurrences of this problem, at the cost of much heavier spinlock and
- memory barrier operations.
-
- If unsure, say n here. Even distro kernels should think twice before
- enabling this: there are few systems, and an unlikely bug.
-
config X86_F00F_BUG
def_bool y
depends on M586MMX || M586TSC || M586 || M486
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 7780bbfb06ef..9242b28418d5 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -5,8 +5,6 @@
#undef CONFIG_OPTIMIZE_INLINING
#endif
-#undef CONFIG_X86_PPRO_FENCE
-
#ifdef CONFIG_X86_64
/*
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index e1259f043ae9..042b5e892ed1 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
"lfence", X86_FEATURE_LFENCE_RDTSC)
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb() rmb()
-#else
#define dma_rmb() barrier()
-#endif
#define dma_wmb() barrier()
#ifdef CONFIG_X86_32
@@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
#define __smp_wmb() barrier()
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#if defined(CONFIG_X86_PPRO_FENCE)
-
-/*
- * For this option x86 doesn't have a strong TSO memory
- * model and we should fall back to full barriers.
- */
-
-#define __smp_store_release(p, v) \
-do { \
- compiletime_assert_atomic_type(*p); \
- __smp_mb(); \
- WRITE_ONCE(*p, v); \
-} while (0)
-
-#define __smp_load_acquire(p) \
-({ \
- typeof(*p) ___p1 = READ_ONCE(*p); \
- compiletime_assert_atomic_type(*p); \
- __smp_mb(); \
- ___p1; \
-})
-
-#else /* regular x86 TSO memory ordering */
-
#define __smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
@@ -107,8 +79,6 @@ do { \
___p1; \
})
-#endif
-
/* Atomic operations are already serializing on x86 */
#define __smp_mb__before_atomic() barrier()
#define __smp_mb__after_atomic() barrier()
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 95e948627fd0..f6e5b9375d8c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void);
*/
#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
-/*
- * Cache management
- *
- * This needed for two cases
- * 1. Out of order aware processors
- * 2. Accidentally out of order processors (PPro errata #51)
- */
-
-static inline void flush_write_buffers(void)
-{
-#if defined(CONFIG_X86_PPRO_FENCE)
- asm volatile("lock; addl $0,0(%%esp)": : :"memory");
-#endif
-}
-
#endif /* __KERNEL__ */
extern void native_io_delay(void);
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 618285e475c6..ac7ea3a8242f 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return NOMMU_MAPPING_ERROR;
- flush_write_buffers();
return bus;
}
@@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
return 0;
s->dma_length = s->length;
}
- flush_write_buffers();
return nents;
}
-static void nommu_sync_single_for_device(struct device *dev,
- dma_addr_t addr, size_t size,
- enum dma_data_direction dir)
-{
- flush_write_buffers();
-}
-
-
-static void nommu_sync_sg_for_device(struct device *dev,
- struct scatterlist *sg, int nelems,
- enum dma_data_direction dir)
-{
- flush_write_buffers();
-}
-
static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return dma_addr == NOMMU_MAPPING_ERROR;
@@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = {
.free = dma_generic_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
- .sync_single_for_device = nommu_sync_single_for_device,
- .sync_sg_for_device = nommu_sync_sg_for_device,
.is_phys = 1,
.mapping_error = nommu_mapping_error,
.dma_supported = x86_dma_supported,
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index b7d73400ea29..f31e5d903161 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -30,11 +30,7 @@
#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb() rmb()
-#else /* CONFIG_X86_PPRO_FENCE */
#define dma_rmb() barrier()
-#endif /* CONFIG_X86_PPRO_FENCE */
#define dma_wmb() barrier()
#include <asm-generic/barrier.h>
--
2.14.2
This gains support for CMA allocations for the force_iommu case, and
cleans up the code a bit.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/x86/kernel/amd_gart_64.c | 36 ++++++++++++++----------------------
1 file changed, 14 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 79ac6caaaabb..f299d8a479bb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -480,29 +480,21 @@ static void *
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
gfp_t flag, unsigned long attrs)
{
- dma_addr_t paddr;
- unsigned long align_mask;
- struct page *page;
-
- if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- page = alloc_pages(flag | __GFP_ZERO, get_order(size));
- if (!page)
- return NULL;
-
- align_mask = (1UL << get_order(size)) - 1;
- paddr = dma_map_area(dev, page_to_phys(page), size,
- DMA_BIDIRECTIONAL, align_mask);
-
- flush_gart();
- if (paddr != bad_dma_addr) {
- *dma_addr = paddr;
- return page_address(page);
- }
- __free_pages(page, get_order(size));
- } else
- return dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+ void *vaddr;
+
+ vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+ if (!vaddr ||
+ !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
+ return vaddr;
+ *dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size,
+ DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1);
+ flush_gart();
+ if (unlikely(*dma_addr == bad_dma_addr))
+ goto out_free;
+ return vaddr;
+out_free:
+ dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
return NULL;
}
--
2.14.2
This cleans up the code a lot by removing duplicate logic.
Signed-off-by: Christoph Hellwig <[email protected]>
Tested-by: Joerg Roedel <[email protected]>
Acked-by: Joerg Roedel <[email protected]>
---
drivers/iommu/Kconfig | 1 +
drivers/iommu/amd_iommu.c | 68 +++++++++++++++--------------------------------
2 files changed, 22 insertions(+), 47 deletions(-)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f3a21343e636..dc7c1914645d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -107,6 +107,7 @@ config IOMMU_PGTABLES_L2
# AMD IOMMU support
config AMD_IOMMU
bool "AMD IOMMU support"
+ select DMA_DIRECT_OPS
select SWIOTLB
select PCI_MSI
select PCI_ATS
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0bf19423b588..83819d0cbf90 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2600,51 +2600,32 @@ static void *alloc_coherent(struct device *dev, size_t size,
unsigned long attrs)
{
u64 dma_mask = dev->coherent_dma_mask;
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- struct page *page;
-
- domain = get_domain(dev);
- if (PTR_ERR(domain) == -EINVAL) {
- page = alloc_pages(flag, get_order(size));
- *dma_addr = page_to_phys(page);
- return page_address(page);
- } else if (IS_ERR(domain))
- return NULL;
+ struct protection_domain *domain = get_domain(dev);
+ bool is_direct = false;
+ void *virt_addr;
- dma_dom = to_dma_ops_domain(domain);
- size = PAGE_ALIGN(size);
- dma_mask = dev->coherent_dma_mask;
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- flag |= __GFP_ZERO;
-
- page = alloc_pages(flag | __GFP_NOWARN, get_order(size));
- if (!page) {
- if (!gfpflags_allow_blocking(flag))
- return NULL;
-
- page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
- get_order(size), flag);
- if (!page)
+ if (IS_ERR(domain)) {
+ if (PTR_ERR(domain) != -EINVAL)
return NULL;
+ is_direct = true;
}
+ virt_addr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+ if (!virt_addr || is_direct)
+ return virt_addr;
+
if (!dma_mask)
dma_mask = *dev->dma_mask;
- *dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
- size, DMA_BIDIRECTIONAL, dma_mask);
-
+ *dma_addr = __map_single(dev, to_dma_ops_domain(domain),
+ virt_to_phys(virt_addr), PAGE_ALIGN(size),
+ DMA_BIDIRECTIONAL, dma_mask);
if (*dma_addr == AMD_IOMMU_MAPPING_ERROR)
goto out_free;
-
- return page_address(page);
+ return virt_addr;
out_free:
-
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, get_order(size));
-
+ dma_direct_free(dev, size, virt_addr, *dma_addr, attrs);
return NULL;
}
@@ -2655,24 +2636,17 @@ static void free_coherent(struct device *dev, size_t size,
void *virt_addr, dma_addr_t dma_addr,
unsigned long attrs)
{
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- struct page *page;
+ struct protection_domain *domain = get_domain(dev);
- page = virt_to_page(virt_addr);
size = PAGE_ALIGN(size);
- domain = get_domain(dev);
- if (IS_ERR(domain))
- goto free_mem;
+ if (!IS_ERR(domain)) {
+ struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain);
- dma_dom = to_dma_ops_domain(domain);
-
- __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
+ __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
+ }
-free_mem:
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, get_order(size));
+ dma_direct_free(dev, size, virt_addr, dma_addr, attrs);
}
/*
--
2.14.2
Now that set_memory_decrypted is always available we can just call it
directly.
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
---
arch/x86/include/asm/mem_encrypt.h | 2 --
arch/x86/mm/mem_encrypt.c | 9 ---------
lib/swiotlb.c | 12 ++++++------
3 files changed, 6 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 22c5f3e6f820..9da0b63c8fc7 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,8 +48,6 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
-
bool sme_active(void);
bool sev_active(void);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 66beedc8fe3d..d3b80d5f9828 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -446,15 +446,6 @@ void __init mem_encrypt_init(void)
: "Secure Memory Encryption (SME)");
}
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
-{
- WARN(PAGE_ALIGN(size) != size,
- "size is not page-aligned (%#lx)\n", size);
-
- /* Make the SWIOTLB buffer area decrypted */
- set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
-}
-
struct sme_populate_pgd_data {
void *pgtable_area;
pgd_t *pgd;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c43ec2271469..005d1d87bb2e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -31,6 +31,7 @@
#include <linux/gfp.h>
#include <linux/scatterlist.h>
#include <linux/mem_encrypt.h>
+#include <linux/set_memory.h>
#include <asm/io.h>
#include <asm/dma.h>
@@ -156,8 +157,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
-void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { }
-
/* For swiotlb, clear memory encryption mask from dma addresses */
static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
phys_addr_t address)
@@ -209,12 +208,12 @@ void __init swiotlb_update_mem_attributes(void)
vaddr = phys_to_virt(io_tlb_start);
bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
- swiotlb_set_mem_attributes(vaddr, bytes);
+ set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
vaddr = phys_to_virt(io_tlb_overflow_buffer);
bytes = PAGE_ALIGN(io_tlb_overflow);
- swiotlb_set_mem_attributes(vaddr, bytes);
+ set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
}
@@ -355,7 +354,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
io_tlb_start = virt_to_phys(tlb);
io_tlb_end = io_tlb_start + bytes;
- swiotlb_set_mem_attributes(tlb, bytes);
+ set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
memset(tlb, 0, bytes);
/*
@@ -366,7 +365,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
if (!v_overflow_buffer)
goto cleanup2;
- swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow);
+ set_memory_decrypted((unsigned long)v_overflow_buffer,
+ io_tlb_overflow >> PAGE_SHIFT);
memset(v_overflow_buffer, 0, io_tlb_overflow);
io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
--
2.14.2
With that in place the generic dma-direct routines can be used to
allocate non-encrypted bounce buffers, and the x86 SEV case can use
the generic swiotlb ops including nice features such as using CMA
allocations.
Note that I'm not too happy about using sev_active() in dma-direct, but
I couldn't come up with a good enough name for a wrapper to make it
worth adding.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/x86/mm/mem_encrypt.c | 73 ++---------------------------------------------
lib/dma-direct.c | 32 +++++++++++++++++----
2 files changed, 29 insertions(+), 76 deletions(-)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1a05bea831a8..65f45e0ef496 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -200,58 +200,6 @@ void __init sme_early_init(void)
swiotlb_force = SWIOTLB_FORCE;
}
-static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
-{
- unsigned int order;
- struct page *page;
- void *vaddr = NULL;
-
- order = get_order(size);
- page = alloc_pages_node(dev_to_node(dev), gfp, order);
- if (page) {
- dma_addr_t addr;
-
- /*
- * Since we will be clearing the encryption bit, check the
- * mask with it already cleared.
- */
- addr = __phys_to_dma(dev, page_to_phys(page));
- if ((addr + size) > dev->coherent_dma_mask) {
- __free_pages(page, get_order(size));
- } else {
- vaddr = page_address(page);
- *dma_handle = addr;
- }
- }
-
- if (!vaddr)
- vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
-
- if (!vaddr)
- return NULL;
-
- /* Clear the SME encryption bit for DMA use if not swiotlb area */
- if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) {
- set_memory_decrypted((unsigned long)vaddr, 1 << order);
- memset(vaddr, 0, PAGE_SIZE << order);
- *dma_handle = __sme_clr(*dma_handle);
- }
-
- return vaddr;
-}
-
-static void sev_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- /* Set the SME encryption bit for re-use if not swiotlb area */
- if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle)))
- set_memory_encrypted((unsigned long)vaddr,
- 1 << get_order(size));
-
- swiotlb_free_coherent(dev, size, vaddr, dma_handle);
-}
-
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
{
pgprot_t old_prot, new_prot;
@@ -404,20 +352,6 @@ bool sev_active(void)
}
EXPORT_SYMBOL(sev_active);
-static const struct dma_map_ops sev_dma_ops = {
- .alloc = sev_alloc,
- .free = sev_free,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = swiotlb_dma_mapping_error,
-};
-
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void)
{
@@ -428,12 +362,11 @@ void __init mem_encrypt_init(void)
swiotlb_update_mem_attributes();
/*
- * With SEV, DMA operations cannot use encryption. New DMA ops
- * are required in order to mark the DMA areas as decrypted or
- * to use bounce buffers.
+ * With SEV, DMA operations cannot use encryption, we need to use
+ * SWIOTLB to bounce buffer DMA operation.
*/
if (sev_active())
- dma_ops = &sev_dma_ops;
+ dma_ops = &swiotlb_dma_ops;
/*
* With SEV, we need to unroll the rep string I/O instructions.
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index c9e8e21cb334..1277d293d4da 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -9,6 +9,7 @@
#include <linux/scatterlist.h>
#include <linux/dma-contiguous.h>
#include <linux/pfn.h>
+#include <linux/set_memory.h>
#define DIRECT_MAPPING_ERROR 0
@@ -20,6 +21,14 @@
#define ARCH_ZONE_DMA_BITS 24
#endif
+/*
+ * For AMD SEV all DMA must be to unencrypted addresses.
+ */
+static inline bool force_dma_unencrypted(void)
+{
+ return sev_active();
+}
+
static bool
check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
const char *caller)
@@ -37,7 +46,9 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
{
- return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask;
+ dma_addr_t addr = force_dma_unencrypted() ?
+ __phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
+ return addr + size - 1 <= dev->coherent_dma_mask;
}
void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
@@ -46,6 +57,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
int page_order = get_order(size);
struct page *page = NULL;
+ void *ret;
/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
@@ -78,10 +90,15 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
if (!page)
return NULL;
-
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
- memset(page_address(page), 0, size);
- return page_address(page);
+ ret = page_address(page);
+ if (force_dma_unencrypted()) {
+ set_memory_decrypted((unsigned long)ret, 1 << page_order);
+ *dma_handle = __phys_to_dma(dev, page_to_phys(page));
+ } else {
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
+ }
+ memset(ret, 0, size);
+ return ret;
}
/*
@@ -92,9 +109,12 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned int page_order = get_order(size);
+ if (force_dma_unencrypted())
+ set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
- free_pages((unsigned long)cpu_addr, get_order(size));
+ free_pages((unsigned long)cpu_addr, page_order);
}
static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
--
2.14.2
All dma_ops implementations used on x86 now take care of setting their own
required GFP_ masks for the allocation. And given that the common code
now clears harmful flags itself that means we can stop the flags in all
the iommu implementations as well.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/x86/include/asm/dma-mapping.h | 11 -----------
arch/x86/kernel/pci-calgary_64.c | 2 --
arch/x86/kernel/pci-dma.c | 2 --
arch/x86/mm/mem_encrypt.c | 7 -------
4 files changed, 22 deletions(-)
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index df9816b385eb..89ce4bfd241f 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,15 +36,4 @@ int arch_dma_supported(struct device *dev, u64 mask);
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
#define arch_dma_alloc_attrs arch_dma_alloc_attrs
-static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
-{
- if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
- gfp |= GFP_DMA;
-#ifdef CONFIG_X86_64
- if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
- gfp |= GFP_DMA32;
-#endif
- return gfp;
-}
-
#endif
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 5647853053bd..bbfc8b1e9104 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -446,8 +446,6 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
npages = size >> PAGE_SHIFT;
order = get_order(size);
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
/* alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages(flag, order);
if (!ret)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index db0b88ea8d1b..14437116ffea 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -82,8 +82,6 @@ bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
if (!*dev)
*dev = &x86_dma_fallback_dev;
- *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
-
if (!is_device_dma_capable(*dev))
return false;
return true;
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 75dc8b525c12..66beedc8fe3d 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -208,13 +208,6 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
void *vaddr = NULL;
order = get_order(size);
-
- /*
- * Memory will be memset to zero after marking decrypted, so don't
- * bother clearing it before.
- */
- gfp &= ~__GFP_ZERO;
-
page = alloc_pages_node(dev_to_node(dev), gfp, order);
if (page) {
dma_addr_t addr;
--
2.14.2
These days all devices (including the ISA fallback device) have a coherent
DMA mask set, so remove the workaround.
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
---
arch/x86/include/asm/dma-mapping.h | 18 ++----------------
arch/x86/kernel/pci-dma.c | 10 ++++------
arch/x86/mm/mem_encrypt.c | 4 +---
drivers/xen/swiotlb-xen.c | 16 +---------------
4 files changed, 8 insertions(+), 40 deletions(-)
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 6277c83c0eb1..545bf3721bc0 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -44,26 +44,12 @@ extern void dma_generic_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
unsigned long attrs);
-static inline unsigned long dma_alloc_coherent_mask(struct device *dev,
- gfp_t gfp)
-{
- unsigned long dma_mask = 0;
-
- dma_mask = dev->coherent_dma_mask;
- if (!dma_mask)
- dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
-
- return dma_mask;
-}
-
static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
{
- unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp);
-
- if (dma_mask <= DMA_BIT_MASK(24))
+ if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
gfp |= GFP_DMA;
#ifdef CONFIG_X86_64
- if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
+ if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
gfp |= GFP_DMA32;
#endif
return gfp;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index df7ab02f959f..b59820872ec7 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -80,13 +80,10 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
unsigned long attrs)
{
- unsigned long dma_mask;
struct page *page;
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
dma_addr_t addr;
- dma_mask = dma_alloc_coherent_mask(dev, flag);
-
again:
page = NULL;
/* CMA can be used only in the context which permits sleeping */
@@ -95,7 +92,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
flag);
if (page) {
addr = phys_to_dma(dev, page_to_phys(page));
- if (addr + size > dma_mask) {
+ if (addr + size > dev->coherent_dma_mask) {
dma_release_from_contiguous(dev, page, count);
page = NULL;
}
@@ -108,10 +105,11 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
return NULL;
addr = phys_to_dma(dev, page_to_phys(page));
- if (addr + size > dma_mask) {
+ if (addr + size > dev->coherent_dma_mask) {
__free_pages(page, get_order(size));
- if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
+ if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
+ !(flag & GFP_DMA)) {
flag = (flag & ~GFP_DMA32) | GFP_DMA;
goto again;
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1a53071e2e17..75dc8b525c12 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -203,12 +203,10 @@ void __init sme_early_init(void)
static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
- unsigned long dma_mask;
unsigned int order;
struct page *page;
void *vaddr = NULL;
- dma_mask = dma_alloc_coherent_mask(dev, gfp);
order = get_order(size);
/*
@@ -226,7 +224,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
* mask with it already cleared.
*/
addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
- if ((addr + size) > dma_mask) {
+ if ((addr + size) > dev->coherent_dma_mask) {
__free_pages(page, get_order(size));
} else {
vaddr = page_address(page);
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 5bb72d3f8337..e1c60899fdbc 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -53,20 +53,6 @@
* API.
*/
-#ifndef CONFIG_X86
-static unsigned long dma_alloc_coherent_mask(struct device *dev,
- gfp_t gfp)
-{
- unsigned long dma_mask = 0;
-
- dma_mask = dev->coherent_dma_mask;
- if (!dma_mask)
- dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
-
- return dma_mask;
-}
-#endif
-
#define XEN_SWIOTLB_ERROR_CODE (~(dma_addr_t)0x0)
static char *xen_io_tlb_start, *xen_io_tlb_end;
@@ -328,7 +314,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
return ret;
if (hwdev && hwdev->coherent_dma_mask)
- dma_mask = dma_alloc_coherent_mask(hwdev, flags);
+ dma_mask = hwdev->coherent_dma_mask;
/* At this point dma_handle is the physical address, next we are
* going to set it to the machine address.
--
2.14.2
The generic swiotlb dma ops were based on the x86 ones and provide
equivalent functionality, so use them.
Also fix the sta2x11 case. For that SOC the dma map ops need an
additional physical to dma address translations. For swiotlb buffers
that is done throught the phys_to_dma helper, but the sta2x11_dma_ops
also added an additional translation on the return value from
x86_swiotlb_alloc_coherent, which is only correct if that functions
returns a direct allocation and not a swiotlb buffer. With the
generic swiotlb and dma-direct code phys_to_dma is not always used
and the separate sta2x11_dma_ops can be replaced with a simple
bit that marks if the additional physical to dma address translation
is needed.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/x86/include/asm/device.h | 3 +++
arch/x86/include/asm/swiotlb.h | 8 -------
arch/x86/kernel/pci-swiotlb.c | 47 +-----------------------------------------
arch/x86/pci/sta2x11-fixup.c | 46 +++++------------------------------------
4 files changed, 9 insertions(+), 95 deletions(-)
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 5e12c63b47aa..a8f6c809d9b1 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,6 +6,9 @@ struct dev_archdata {
#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
void *iommu; /* hook for IOMMU specific extension */
#endif
+#ifdef CONFIG_STA2X11
+ bool is_sta2x11;
+#endif
};
#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 1c6a6cb230ff..ff6c92eff035 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void)
{
}
#endif
-
-extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- unsigned long attrs);
-extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs);
-
#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index bcb6a9bf64ad..661583662430 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -17,51 +17,6 @@
int swiotlb __read_mostly;
-void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- unsigned long attrs)
-{
- void *vaddr;
-
- /*
- * Don't print a warning when the first allocation attempt fails.
- * swiotlb_alloc_coherent() will print a warning when the DMA
- * memory allocation ultimately failed.
- */
- flags |= __GFP_NOWARN;
-
- vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
- if (vaddr)
- return vaddr;
-
- return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
-}
-
-void x86_swiotlb_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs)
-{
- if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
- swiotlb_free_coherent(dev, size, vaddr, dma_addr);
- else
- dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
-static const struct dma_map_ops x86_swiotlb_dma_ops = {
- .mapping_error = swiotlb_dma_mapping_error,
- .alloc = x86_swiotlb_alloc_coherent,
- .free = x86_swiotlb_free_coherent,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .dma_supported = NULL,
-};
-
/*
* pci_swiotlb_detect_override - set swiotlb to 1 if necessary
*
@@ -111,7 +66,7 @@ void __init pci_swiotlb_init(void)
{
if (swiotlb) {
swiotlb_init(0);
- dma_ops = &x86_swiotlb_dma_ops;
+ dma_ops = &swiotlb_dma_ops;
}
}
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 6c712fe11bdc..eac58e03f43c 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -159,43 +159,6 @@ static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev)
return p;
}
-/**
- * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers
- * returns virtual address. This is the only "special" function here.
- * @dev: PCI device
- * @size: Size of the buffer
- * @dma_handle: DMA address
- * @flags: memory flags
- */
-static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t flags,
- unsigned long attrs)
-{
- void *vaddr;
-
- vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
- *dma_handle = p2a(*dma_handle, to_pci_dev(dev));
- return vaddr;
-}
-
-/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
-static const struct dma_map_ops sta2x11_dma_ops = {
- .alloc = sta2x11_swiotlb_alloc_coherent,
- .free = x86_swiotlb_free_coherent,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = swiotlb_dma_mapping_error,
- .dma_supported = dma_direct_supported,
-};
-
/* At setup time, we use our own ops if the device is a ConneXt one */
static void sta2x11_setup_pdev(struct pci_dev *pdev)
{
@@ -205,7 +168,8 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
return;
pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
- pdev->dev.dma_ops = &sta2x11_dma_ops;
+ pdev->dev.dma_ops = &swiotlb_dma_ops;
+ pdev->dev.archdata.is_sta2x11 = true;
/* We must enable all devices as master, for audio DMA to work */
pci_set_master(pdev);
@@ -225,7 +189,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
struct sta2x11_mapping *map;
- if (dev->dma_ops != &sta2x11_dma_ops) {
+ if (!dev->archdata.is_sta2x11) {
if (!dev->dma_mask)
return false;
return addr + size - 1 <= *dev->dma_mask;
@@ -249,7 +213,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
*/
dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
{
- if (dev->dma_ops != &sta2x11_dma_ops)
+ if (!dev->archdata.is_sta2x11)
return paddr;
return p2a(paddr, to_pci_dev(dev));
}
@@ -261,7 +225,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
*/
phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
{
- if (dev->dma_ops != &sta2x11_dma_ops)
+ if (!dev->archdata.is_sta2x11)
return daddr;
return a2p(daddr, to_pci_dev(dev));
}
--
2.14.2
Give the basic phys_to_dma and dma_to_phys helpers a __-prefix and add
the memory encryption mask to the non-prefixed versions. Use the
__-prefixed versions directly instead of clearing the mask again in
various places.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/arm/include/asm/dma-direct.h | 4 ++--
arch/mips/cavium-octeon/dma-octeon.c | 10 ++++-----
.../include/asm/mach-cavium-octeon/dma-coherence.h | 4 ++--
.../include/asm/mach-loongson64/dma-coherence.h | 10 ++++-----
arch/mips/loongson64/common/dma-swiotlb.c | 4 ++--
arch/powerpc/include/asm/dma-direct.h | 4 ++--
arch/x86/Kconfig | 2 +-
arch/x86/include/asm/dma-direct.h | 25 ++--------------------
arch/x86/mm/mem_encrypt.c | 2 +-
arch/x86/pci/sta2x11-fixup.c | 6 +++---
include/linux/dma-direct.h | 21 ++++++++++++++++--
lib/swiotlb.c | 25 ++++++++--------------
12 files changed, 53 insertions(+), 64 deletions(-)
diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h
index 5b0a8a421894..b67e5fc1fe43 100644
--- a/arch/arm/include/asm/dma-direct.h
+++ b/arch/arm/include/asm/dma-direct.h
@@ -2,13 +2,13 @@
#ifndef ASM_ARM_DMA_DIRECT_H
#define ASM_ARM_DMA_DIRECT_H 1
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
unsigned int offset = paddr & ~PAGE_MASK;
return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset;
}
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
{
unsigned int offset = dev_addr & ~PAGE_MASK;
return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index c7bb8a407041..7b335ab21697 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -10,7 +10,7 @@
* IP32 changes by Ilya.
* Copyright (C) 2010 Cavium Networks, Inc.
*/
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/scatterlist.h>
#include <linux/bootmem.h>
#include <linux/export.h>
@@ -182,7 +182,7 @@ struct octeon_dma_map_ops {
phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
};
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
struct octeon_dma_map_ops,
@@ -190,9 +190,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
return ops->phys_to_dma(dev, paddr);
}
-EXPORT_SYMBOL(phys_to_dma);
+EXPORT_SYMBOL(__phys_to_dma);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
struct octeon_dma_map_ops,
@@ -200,7 +200,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
return ops->dma_to_phys(dev, daddr);
}
-EXPORT_SYMBOL(dma_to_phys);
+EXPORT_SYMBOL(__dma_to_phys);
static struct octeon_dma_map_ops octeon_linear_dma_map_ops = {
.dma_map_ops = {
diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
index 138edf6b5b48..6eb1ee548b11 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
@@ -69,8 +69,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
return addr + size - 1 <= *dev->dma_mask;
}
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
struct dma_map_ops;
extern const struct dma_map_ops *octeon_pci_dma_map_ops;
diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
index b1b575f5c6c1..64fc44dec0a8 100644
--- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h
+++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
@@ -25,13 +25,13 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
return addr + size - 1 <= *dev->dma_mask;
}
-extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+extern dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+extern phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
size_t size)
{
#ifdef CONFIG_CPU_LOONGSON3
- return phys_to_dma(dev, virt_to_phys(addr));
+ return __phys_to_dma(dev, virt_to_phys(addr));
#else
return virt_to_phys(addr) | 0x80000000;
#endif
@@ -41,7 +41,7 @@ static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
struct page *page)
{
#ifdef CONFIG_CPU_LOONGSON3
- return phys_to_dma(dev, page_to_phys(page));
+ return __phys_to_dma(dev, page_to_phys(page));
#else
return page_to_phys(page) | 0x80000000;
#endif
@@ -51,7 +51,7 @@ static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
dma_addr_t dma_addr)
{
#if defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_64BIT)
- return dma_to_phys(dev, dma_addr);
+ return __dma_to_phys(dev, dma_addr);
#elif defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT)
return (dma_addr > 0x8fffffff) ? dma_addr : (dma_addr & 0x0fffffff);
#else
diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c
index 7bbcf89475f3..6a739f8ae110 100644
--- a/arch/mips/loongson64/common/dma-swiotlb.c
+++ b/arch/mips/loongson64/common/dma-swiotlb.c
@@ -63,7 +63,7 @@ static int loongson_dma_supported(struct device *dev, u64 mask)
return swiotlb_dma_supported(dev, mask);
}
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
long nid;
#ifdef CONFIG_PHYS48_TO_HT40
@@ -75,7 +75,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
return paddr;
}
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
long nid;
#ifdef CONFIG_PHYS48_TO_HT40
diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
index a5b59c765426..7702875aabb7 100644
--- a/arch/powerpc/include/asm/dma-direct.h
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -17,12 +17,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
return addr + size - 1 <= *dev->dma_mask;
}
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
return paddr + get_dma_offset(dev);
}
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
return daddr - get_dma_offset(dev);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 10f482beda1b..1ca4f0874517 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,7 +54,6 @@ config X86
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
- select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_REFCOUNT
@@ -692,6 +691,7 @@ config X86_SUPPORTS_MEMORY_FAILURE
config STA2X11
bool "STA2X11 Companion Chip Support"
depends on X86_32_NON_STANDARD && PCI
+ select ARCH_HAS_PHYS_TO_DMA
select X86_DEV_DMA_OPS
select X86_DMA_REMAP
select SWIOTLB
diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
index 1295bc622ebe..1a19251eaac9 100644
--- a/arch/x86/include/asm/dma-direct.h
+++ b/arch/x86/include/asm/dma-direct.h
@@ -2,29 +2,8 @@
#ifndef ASM_X86_DMA_DIRECT_H
#define ASM_X86_DMA_DIRECT_H 1
-#include <linux/mem_encrypt.h>
-
-#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
-#else
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
- if (!dev->dma_mask)
- return 0;
-
- return addr + size - 1 <= *dev->dma_mask;
-}
-
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
- return __sme_set(paddr);
-}
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
-{
- return __sme_clr(daddr);
-}
-#endif /* CONFIG_X86_DMA_REMAP */
#endif /* ASM_X86_DMA_DIRECT_H */
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d3b80d5f9828..1a05bea831a8 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -216,7 +216,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
* Since we will be clearing the encryption bit, check the
* mask with it already cleared.
*/
- addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
+ addr = __phys_to_dma(dev, page_to_phys(page));
if ((addr + size) > dev->coherent_dma_mask) {
__free_pages(page, get_order(size));
} else {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index eac58e03f43c..7a5bafb76d77 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -207,11 +207,11 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
}
/**
- * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
+ * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
* @dev: device for a PCI device
* @paddr: Physical address
*/
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
if (!dev->archdata.is_sta2x11)
return paddr;
@@ -223,7 +223,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
* @dev: device for a PCI device
* @daddr: STA2x11 AMBA DMA address
*/
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
if (!dev->archdata.is_sta2x11)
return daddr;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index bcdb1a3e4b1f..53ad6a47f513 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -3,18 +3,19 @@
#define _LINUX_DMA_DIRECT_H 1
#include <linux/dma-mapping.h>
+#include <linux/mem_encrypt.h>
#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
#include <asm/dma-direct.h>
#else
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
dma_addr_t dev_addr = (dma_addr_t)paddr;
return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
}
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
{
phys_addr_t paddr = (phys_addr_t)dev_addr;
@@ -30,6 +31,22 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
}
#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
+/*
+ * If memory encryption is supported, phys_to_dma will set the memory encryption
+ * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma
+ * and __dma_to_phys versions should only be used on non-encrypted memory for
+ * special occasions like DMA coherent buffers.
+ */
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return __sme_set(__phys_to_dma(dev, paddr));
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+ return __sme_clr(__dma_to_phys(dev, daddr));
+}
+
#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN
void dma_mark_clean(void *addr, size_t size);
#else
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 005d1d87bb2e..8b06b4485e65 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
-/* For swiotlb, clear memory encryption mask from dma addresses */
-static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
- phys_addr_t address)
-{
- return __sme_clr(phys_to_dma(hwdev, address));
-}
-
/* Note that this doesn't work with highmem page */
static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
volatile void *address)
@@ -622,7 +615,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
return SWIOTLB_MAP_ERROR;
}
- start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
+ start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
dir, attrs);
}
@@ -726,12 +719,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
goto out_warn;
phys_addr = swiotlb_tbl_map_single(dev,
- swiotlb_phys_to_dma(dev, io_tlb_start),
+ __phys_to_dma(dev, io_tlb_start),
0, size, DMA_FROM_DEVICE, 0);
if (phys_addr == SWIOTLB_MAP_ERROR)
goto out_warn;
- *dma_handle = swiotlb_phys_to_dma(dev, phys_addr);
+ *dma_handle = __phys_to_dma(dev, phys_addr);
if (dma_coherent_ok(dev, *dma_handle, size))
goto out_unmap;
@@ -867,10 +860,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
map = map_single(dev, phys, size, dir, attrs);
if (map == SWIOTLB_MAP_ERROR) {
swiotlb_full(dev, size, dir, 1);
- return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+ return __phys_to_dma(dev, io_tlb_overflow_buffer);
}
- dev_addr = swiotlb_phys_to_dma(dev, map);
+ dev_addr = __phys_to_dma(dev, map);
/* Ensure that the address returned is DMA'ble */
if (dma_capable(dev, dev_addr, size))
@@ -879,7 +872,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
- return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+ return __phys_to_dma(dev, io_tlb_overflow_buffer);
}
/*
@@ -1009,7 +1002,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
sg_dma_len(sgl) = 0;
return 0;
}
- sg->dma_address = swiotlb_phys_to_dma(hwdev, map);
+ sg->dma_address = __phys_to_dma(hwdev, map);
} else
sg->dma_address = dev_addr;
sg_dma_len(sg) = sg->length;
@@ -1073,7 +1066,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
{
- return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
+ return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
}
/*
@@ -1085,7 +1078,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
int
swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
+ return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
}
#ifdef CONFIG_DMA_DIRECT_OPS
--
2.14.2
On 3/19/2018 5:38 AM, Christoph Hellwig wrote:
> Hi all,
>
> this series switches the x86 code the the dma-direct implementation
> for direct (non-iommu) dma and the generic swiotlb ops. This includes
> getting rid of the special ops for the AMD memory encryption case and
> the STA2x11 SOC. The generic implementations are based on the x86
> code, so they provide the same functionality.
>
Booted the system with SME on and off and the IOMMU on, off and in
passthrough mode without issues. In each case, booted an SEV guest
without issues. I'll be able to do more extensive testing later, but
for now...
Tested-by: Tom Lendacky <[email protected]>
> Changes since V2:
> - minor changelog tweaks
> - made is_sta2x11 a plain bool
>
> Changes since V1:
> - fix the length in the set_memory_decrypted call
> - fix the SEV case
>
On 3/19/2018 5:38 AM, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <[email protected]>
> Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
> ---
> include/linux/set_memory.h | 12 ++++++++++++
> 1 file changed, 12 insertions(+)
>
> diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
> index e5140648f638..da5178216da5 100644
> --- a/include/linux/set_memory.h
> +++ b/include/linux/set_memory.h
> @@ -17,4 +17,16 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> #endif
>
> +#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
> +static inline int set_memory_encrypted(unsigned long addr, int numpages)
> +{
> + return 0;
> +}
> +
> +static inline int set_memory_decrypted(unsigned long addr, int numpages)
> +{
> + return 0;
> +}
> +#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
> +
> #endif /* _LINUX_SET_MEMORY_H_ */
>
On 3/19/2018 5:38 AM, Christoph Hellwig wrote:
> Now that set_memory_decrypted is always available we can just call it
> directly.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
> ---
> arch/x86/include/asm/mem_encrypt.h | 2 --
> arch/x86/mm/mem_encrypt.c | 9 ---------
> lib/swiotlb.c | 12 ++++++------
> 3 files changed, 6 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
> index 22c5f3e6f820..9da0b63c8fc7 100644
> --- a/arch/x86/include/asm/mem_encrypt.h
> +++ b/arch/x86/include/asm/mem_encrypt.h
> @@ -48,8 +48,6 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
> /* Architecture __weak replacement functions */
> void __init mem_encrypt_init(void);
>
> -void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
> -
> bool sme_active(void);
> bool sev_active(void);
>
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index 66beedc8fe3d..d3b80d5f9828 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -446,15 +446,6 @@ void __init mem_encrypt_init(void)
> : "Secure Memory Encryption (SME)");
> }
>
> -void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
> -{
> - WARN(PAGE_ALIGN(size) != size,
> - "size is not page-aligned (%#lx)\n", size);
> -
> - /* Make the SWIOTLB buffer area decrypted */
> - set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
> -}
> -
> struct sme_populate_pgd_data {
> void *pgtable_area;
> pgd_t *pgd;
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> index c43ec2271469..005d1d87bb2e 100644
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -31,6 +31,7 @@
> #include <linux/gfp.h>
> #include <linux/scatterlist.h>
> #include <linux/mem_encrypt.h>
> +#include <linux/set_memory.h>
>
> #include <asm/io.h>
> #include <asm/dma.h>
> @@ -156,8 +157,6 @@ unsigned long swiotlb_size_or_default(void)
> return size ? size : (IO_TLB_DEFAULT_SIZE);
> }
>
> -void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { }
> -
> /* For swiotlb, clear memory encryption mask from dma addresses */
> static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
> phys_addr_t address)
> @@ -209,12 +208,12 @@ void __init swiotlb_update_mem_attributes(void)
>
> vaddr = phys_to_virt(io_tlb_start);
> bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
> - swiotlb_set_mem_attributes(vaddr, bytes);
> + set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> memset(vaddr, 0, bytes);
>
> vaddr = phys_to_virt(io_tlb_overflow_buffer);
> bytes = PAGE_ALIGN(io_tlb_overflow);
> - swiotlb_set_mem_attributes(vaddr, bytes);
> + set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> memset(vaddr, 0, bytes);
> }
>
> @@ -355,7 +354,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
> io_tlb_start = virt_to_phys(tlb);
> io_tlb_end = io_tlb_start + bytes;
>
> - swiotlb_set_mem_attributes(tlb, bytes);
> + set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
> memset(tlb, 0, bytes);
>
> /*
> @@ -366,7 +365,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
> if (!v_overflow_buffer)
> goto cleanup2;
>
> - swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow);
> + set_memory_decrypted((unsigned long)v_overflow_buffer,
> + io_tlb_overflow >> PAGE_SHIFT);
> memset(v_overflow_buffer, 0, io_tlb_overflow);
> io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
>
>
On 3/19/2018 5:38 AM, Christoph Hellwig wrote:
> Give the basic phys_to_dma and dma_to_phys helpers a __-prefix and add
> the memory encryption mask to the non-prefixed versions. Use the
> __-prefixed versions directly instead of clearing the mask again in
> various places.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
> ---
> arch/arm/include/asm/dma-direct.h | 4 ++--
> arch/mips/cavium-octeon/dma-octeon.c | 10 ++++-----
> .../include/asm/mach-cavium-octeon/dma-coherence.h | 4 ++--
> .../include/asm/mach-loongson64/dma-coherence.h | 10 ++++-----
> arch/mips/loongson64/common/dma-swiotlb.c | 4 ++--
> arch/powerpc/include/asm/dma-direct.h | 4 ++--
> arch/x86/Kconfig | 2 +-
> arch/x86/include/asm/dma-direct.h | 25 ++--------------------
> arch/x86/mm/mem_encrypt.c | 2 +-
> arch/x86/pci/sta2x11-fixup.c | 6 +++---
> include/linux/dma-direct.h | 21 ++++++++++++++++--
> lib/swiotlb.c | 25 ++++++++--------------
> 12 files changed, 53 insertions(+), 64 deletions(-)
>
> diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h
> index 5b0a8a421894..b67e5fc1fe43 100644
> --- a/arch/arm/include/asm/dma-direct.h
> +++ b/arch/arm/include/asm/dma-direct.h
> @@ -2,13 +2,13 @@
> #ifndef ASM_ARM_DMA_DIRECT_H
> #define ASM_ARM_DMA_DIRECT_H 1
>
> -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> unsigned int offset = paddr & ~PAGE_MASK;
> return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset;
> }
>
> -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> {
> unsigned int offset = dev_addr & ~PAGE_MASK;
> return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
> diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
> index c7bb8a407041..7b335ab21697 100644
> --- a/arch/mips/cavium-octeon/dma-octeon.c
> +++ b/arch/mips/cavium-octeon/dma-octeon.c
> @@ -10,7 +10,7 @@
> * IP32 changes by Ilya.
> * Copyright (C) 2010 Cavium Networks, Inc.
> */
> -#include <linux/dma-mapping.h>
> +#include <linux/dma-direct.h>
> #include <linux/scatterlist.h>
> #include <linux/bootmem.h>
> #include <linux/export.h>
> @@ -182,7 +182,7 @@ struct octeon_dma_map_ops {
> phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
> };
>
> -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
> struct octeon_dma_map_ops,
> @@ -190,9 +190,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
>
> return ops->phys_to_dma(dev, paddr);
> }
> -EXPORT_SYMBOL(phys_to_dma);
> +EXPORT_SYMBOL(__phys_to_dma);
>
> -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
> struct octeon_dma_map_ops,
> @@ -200,7 +200,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
>
> return ops->dma_to_phys(dev, daddr);
> }
> -EXPORT_SYMBOL(dma_to_phys);
> +EXPORT_SYMBOL(__dma_to_phys);
>
> static struct octeon_dma_map_ops octeon_linear_dma_map_ops = {
> .dma_map_ops = {
> diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
> index 138edf6b5b48..6eb1ee548b11 100644
> --- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
> +++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
> @@ -69,8 +69,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> return addr + size - 1 <= *dev->dma_mask;
> }
>
> -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
> -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
> +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
> +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
>
> struct dma_map_ops;
> extern const struct dma_map_ops *octeon_pci_dma_map_ops;
> diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
> index b1b575f5c6c1..64fc44dec0a8 100644
> --- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h
> +++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
> @@ -25,13 +25,13 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> return addr + size - 1 <= *dev->dma_mask;
> }
>
> -extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
> -extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
> +extern dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
> +extern phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
> static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
> size_t size)
> {
> #ifdef CONFIG_CPU_LOONGSON3
> - return phys_to_dma(dev, virt_to_phys(addr));
> + return __phys_to_dma(dev, virt_to_phys(addr));
> #else
> return virt_to_phys(addr) | 0x80000000;
> #endif
> @@ -41,7 +41,7 @@ static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
> struct page *page)
> {
> #ifdef CONFIG_CPU_LOONGSON3
> - return phys_to_dma(dev, page_to_phys(page));
> + return __phys_to_dma(dev, page_to_phys(page));
> #else
> return page_to_phys(page) | 0x80000000;
> #endif
> @@ -51,7 +51,7 @@ static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
> dma_addr_t dma_addr)
> {
> #if defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_64BIT)
> - return dma_to_phys(dev, dma_addr);
> + return __dma_to_phys(dev, dma_addr);
> #elif defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT)
> return (dma_addr > 0x8fffffff) ? dma_addr : (dma_addr & 0x0fffffff);
> #else
> diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c
> index 7bbcf89475f3..6a739f8ae110 100644
> --- a/arch/mips/loongson64/common/dma-swiotlb.c
> +++ b/arch/mips/loongson64/common/dma-swiotlb.c
> @@ -63,7 +63,7 @@ static int loongson_dma_supported(struct device *dev, u64 mask)
> return swiotlb_dma_supported(dev, mask);
> }
>
> -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> long nid;
> #ifdef CONFIG_PHYS48_TO_HT40
> @@ -75,7 +75,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> return paddr;
> }
>
> -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> long nid;
> #ifdef CONFIG_PHYS48_TO_HT40
> diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
> index a5b59c765426..7702875aabb7 100644
> --- a/arch/powerpc/include/asm/dma-direct.h
> +++ b/arch/powerpc/include/asm/dma-direct.h
> @@ -17,12 +17,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> return addr + size - 1 <= *dev->dma_mask;
> }
>
> -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> return paddr + get_dma_offset(dev);
> }
>
> -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> return daddr - get_dma_offset(dev);
> }
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 10f482beda1b..1ca4f0874517 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -54,7 +54,6 @@ config X86
> select ARCH_HAS_FORTIFY_SOURCE
> select ARCH_HAS_GCOV_PROFILE_ALL
> select ARCH_HAS_KCOV if X86_64
> - select ARCH_HAS_PHYS_TO_DMA
> select ARCH_HAS_MEMBARRIER_SYNC_CORE
> select ARCH_HAS_PMEM_API if X86_64
> select ARCH_HAS_REFCOUNT
> @@ -692,6 +691,7 @@ config X86_SUPPORTS_MEMORY_FAILURE
> config STA2X11
> bool "STA2X11 Companion Chip Support"
> depends on X86_32_NON_STANDARD && PCI
> + select ARCH_HAS_PHYS_TO_DMA
> select X86_DEV_DMA_OPS
> select X86_DMA_REMAP
> select SWIOTLB
> diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
> index 1295bc622ebe..1a19251eaac9 100644
> --- a/arch/x86/include/asm/dma-direct.h
> +++ b/arch/x86/include/asm/dma-direct.h
> @@ -2,29 +2,8 @@
> #ifndef ASM_X86_DMA_DIRECT_H
> #define ASM_X86_DMA_DIRECT_H 1
>
> -#include <linux/mem_encrypt.h>
> -
> -#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
> bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
> -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
> -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
> -#else
> -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> -{
> - if (!dev->dma_mask)
> - return 0;
> -
> - return addr + size - 1 <= *dev->dma_mask;
> -}
> -
> -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> -{
> - return __sme_set(paddr);
> -}
> +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
> +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
>
> -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> -{
> - return __sme_clr(daddr);
> -}
> -#endif /* CONFIG_X86_DMA_REMAP */
> #endif /* ASM_X86_DMA_DIRECT_H */
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index d3b80d5f9828..1a05bea831a8 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -216,7 +216,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> * Since we will be clearing the encryption bit, check the
> * mask with it already cleared.
> */
> - addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
> + addr = __phys_to_dma(dev, page_to_phys(page));
> if ((addr + size) > dev->coherent_dma_mask) {
> __free_pages(page, get_order(size));
> } else {
> diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
> index eac58e03f43c..7a5bafb76d77 100644
> --- a/arch/x86/pci/sta2x11-fixup.c
> +++ b/arch/x86/pci/sta2x11-fixup.c
> @@ -207,11 +207,11 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> }
>
> /**
> - * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
> + * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
> * @dev: device for a PCI device
> * @paddr: Physical address
> */
> -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> if (!dev->archdata.is_sta2x11)
> return paddr;
> @@ -223,7 +223,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> * @dev: device for a PCI device
> * @daddr: STA2x11 AMBA DMA address
> */
> -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> if (!dev->archdata.is_sta2x11)
> return daddr;
> diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
> index bcdb1a3e4b1f..53ad6a47f513 100644
> --- a/include/linux/dma-direct.h
> +++ b/include/linux/dma-direct.h
> @@ -3,18 +3,19 @@
> #define _LINUX_DMA_DIRECT_H 1
>
> #include <linux/dma-mapping.h>
> +#include <linux/mem_encrypt.h>
>
> #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
> #include <asm/dma-direct.h>
> #else
> -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> dma_addr_t dev_addr = (dma_addr_t)paddr;
>
> return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
> }
>
> -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> {
> phys_addr_t paddr = (phys_addr_t)dev_addr;
>
> @@ -30,6 +31,22 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> }
> #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
>
> +/*
> + * If memory encryption is supported, phys_to_dma will set the memory encryption
> + * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma
> + * and __dma_to_phys versions should only be used on non-encrypted memory for
> + * special occasions like DMA coherent buffers.
> + */
> +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +{
> + return __sme_set(__phys_to_dma(dev, paddr));
> +}
> +
> +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +{
> + return __sme_clr(__dma_to_phys(dev, daddr));
> +}
> +
> #ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN
> void dma_mark_clean(void *addr, size_t size);
> #else
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> index 005d1d87bb2e..8b06b4485e65 100644
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
> return size ? size : (IO_TLB_DEFAULT_SIZE);
> }
>
> -/* For swiotlb, clear memory encryption mask from dma addresses */
> -static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
> - phys_addr_t address)
> -{
> - return __sme_clr(phys_to_dma(hwdev, address));
> -}
> -
> /* Note that this doesn't work with highmem page */
> static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
> volatile void *address)
> @@ -622,7 +615,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
> return SWIOTLB_MAP_ERROR;
> }
>
> - start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
> + start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
> return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
> dir, attrs);
> }
> @@ -726,12 +719,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
> goto out_warn;
>
> phys_addr = swiotlb_tbl_map_single(dev,
> - swiotlb_phys_to_dma(dev, io_tlb_start),
> + __phys_to_dma(dev, io_tlb_start),
> 0, size, DMA_FROM_DEVICE, 0);
> if (phys_addr == SWIOTLB_MAP_ERROR)
> goto out_warn;
>
> - *dma_handle = swiotlb_phys_to_dma(dev, phys_addr);
> + *dma_handle = __phys_to_dma(dev, phys_addr);
> if (dma_coherent_ok(dev, *dma_handle, size))
> goto out_unmap;
>
> @@ -867,10 +860,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
> map = map_single(dev, phys, size, dir, attrs);
> if (map == SWIOTLB_MAP_ERROR) {
> swiotlb_full(dev, size, dir, 1);
> - return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
> + return __phys_to_dma(dev, io_tlb_overflow_buffer);
> }
>
> - dev_addr = swiotlb_phys_to_dma(dev, map);
> + dev_addr = __phys_to_dma(dev, map);
>
> /* Ensure that the address returned is DMA'ble */
> if (dma_capable(dev, dev_addr, size))
> @@ -879,7 +872,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
> attrs |= DMA_ATTR_SKIP_CPU_SYNC;
> swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
>
> - return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
> + return __phys_to_dma(dev, io_tlb_overflow_buffer);
> }
>
> /*
> @@ -1009,7 +1002,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
> sg_dma_len(sgl) = 0;
> return 0;
> }
> - sg->dma_address = swiotlb_phys_to_dma(hwdev, map);
> + sg->dma_address = __phys_to_dma(hwdev, map);
> } else
> sg->dma_address = dev_addr;
> sg_dma_len(sg) = sg->length;
> @@ -1073,7 +1066,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
> int
> swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
> {
> - return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
> + return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
> }
>
> /*
> @@ -1085,7 +1078,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
> int
> swiotlb_dma_supported(struct device *hwdev, u64 mask)
> {
> - return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
> + return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
> }
>
> #ifdef CONFIG_DMA_DIRECT_OPS
>
On 3/19/2018 5:38 AM, Christoph Hellwig wrote:
> With that in place the generic dma-direct routines can be used to
> allocate non-encrypted bounce buffers, and the x86 SEV case can use
> the generic swiotlb ops including nice features such as using CMA
> allocations.
>
> Note that I'm not too happy about using sev_active() in dma-direct, but
> I couldn't come up with a good enough name for a wrapper to make it
> worth adding.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
> ---
> arch/x86/mm/mem_encrypt.c | 73 ++---------------------------------------------
> lib/dma-direct.c | 32 +++++++++++++++++----
> 2 files changed, 29 insertions(+), 76 deletions(-)
>
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index 1a05bea831a8..65f45e0ef496 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -200,58 +200,6 @@ void __init sme_early_init(void)
> swiotlb_force = SWIOTLB_FORCE;
> }
>
> -static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> - gfp_t gfp, unsigned long attrs)
> -{
> - unsigned int order;
> - struct page *page;
> - void *vaddr = NULL;
> -
> - order = get_order(size);
> - page = alloc_pages_node(dev_to_node(dev), gfp, order);
> - if (page) {
> - dma_addr_t addr;
> -
> - /*
> - * Since we will be clearing the encryption bit, check the
> - * mask with it already cleared.
> - */
> - addr = __phys_to_dma(dev, page_to_phys(page));
> - if ((addr + size) > dev->coherent_dma_mask) {
> - __free_pages(page, get_order(size));
> - } else {
> - vaddr = page_address(page);
> - *dma_handle = addr;
> - }
> - }
> -
> - if (!vaddr)
> - vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
> -
> - if (!vaddr)
> - return NULL;
> -
> - /* Clear the SME encryption bit for DMA use if not swiotlb area */
> - if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) {
> - set_memory_decrypted((unsigned long)vaddr, 1 << order);
> - memset(vaddr, 0, PAGE_SIZE << order);
> - *dma_handle = __sme_clr(*dma_handle);
> - }
> -
> - return vaddr;
> -}
> -
> -static void sev_free(struct device *dev, size_t size, void *vaddr,
> - dma_addr_t dma_handle, unsigned long attrs)
> -{
> - /* Set the SME encryption bit for re-use if not swiotlb area */
> - if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle)))
> - set_memory_encrypted((unsigned long)vaddr,
> - 1 << get_order(size));
> -
> - swiotlb_free_coherent(dev, size, vaddr, dma_handle);
> -}
> -
> static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
> {
> pgprot_t old_prot, new_prot;
> @@ -404,20 +352,6 @@ bool sev_active(void)
> }
> EXPORT_SYMBOL(sev_active);
>
> -static const struct dma_map_ops sev_dma_ops = {
> - .alloc = sev_alloc,
> - .free = sev_free,
> - .map_page = swiotlb_map_page,
> - .unmap_page = swiotlb_unmap_page,
> - .map_sg = swiotlb_map_sg_attrs,
> - .unmap_sg = swiotlb_unmap_sg_attrs,
> - .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
> - .sync_single_for_device = swiotlb_sync_single_for_device,
> - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
> - .sync_sg_for_device = swiotlb_sync_sg_for_device,
> - .mapping_error = swiotlb_dma_mapping_error,
> -};
> -
> /* Architecture __weak replacement functions */
> void __init mem_encrypt_init(void)
> {
> @@ -428,12 +362,11 @@ void __init mem_encrypt_init(void)
> swiotlb_update_mem_attributes();
>
> /*
> - * With SEV, DMA operations cannot use encryption. New DMA ops
> - * are required in order to mark the DMA areas as decrypted or
> - * to use bounce buffers.
> + * With SEV, DMA operations cannot use encryption, we need to use
> + * SWIOTLB to bounce buffer DMA operation.
> */
> if (sev_active())
> - dma_ops = &sev_dma_ops;
> + dma_ops = &swiotlb_dma_ops;
>
> /*
> * With SEV, we need to unroll the rep string I/O instructions.
> diff --git a/lib/dma-direct.c b/lib/dma-direct.c
> index c9e8e21cb334..1277d293d4da 100644
> --- a/lib/dma-direct.c
> +++ b/lib/dma-direct.c
> @@ -9,6 +9,7 @@
> #include <linux/scatterlist.h>
> #include <linux/dma-contiguous.h>
> #include <linux/pfn.h>
> +#include <linux/set_memory.h>
>
> #define DIRECT_MAPPING_ERROR 0
>
> @@ -20,6 +21,14 @@
> #define ARCH_ZONE_DMA_BITS 24
> #endif
>
> +/*
> + * For AMD SEV all DMA must be to unencrypted addresses.
> + */
> +static inline bool force_dma_unencrypted(void)
> +{
> + return sev_active();
> +}
> +
> static bool
> check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
> const char *caller)
> @@ -37,7 +46,9 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
>
> static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
> {
> - return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask;
> + dma_addr_t addr = force_dma_unencrypted() ?
> + __phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
> + return addr + size - 1 <= dev->coherent_dma_mask;
> }
>
> void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> @@ -46,6 +57,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> int page_order = get_order(size);
> struct page *page = NULL;
> + void *ret;
>
> /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
> if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
> @@ -78,10 +90,15 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
>
> if (!page)
> return NULL;
> -
> - *dma_handle = phys_to_dma(dev, page_to_phys(page));
> - memset(page_address(page), 0, size);
> - return page_address(page);
> + ret = page_address(page);
> + if (force_dma_unencrypted()) {
> + set_memory_decrypted((unsigned long)ret, 1 << page_order);
> + *dma_handle = __phys_to_dma(dev, page_to_phys(page));
> + } else {
> + *dma_handle = phys_to_dma(dev, page_to_phys(page));
> + }
> + memset(ret, 0, size);
> + return ret;
> }
>
> /*
> @@ -92,9 +109,12 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
> dma_addr_t dma_addr, unsigned long attrs)
> {
> unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> + unsigned int page_order = get_order(size);
>
> + if (force_dma_unencrypted())
> + set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
> if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
> - free_pages((unsigned long)cpu_addr, get_order(size));
> + free_pages((unsigned long)cpu_addr, page_order);
> }
>
> static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
>
On Mon, 19 Mar 2018, Christoph Hellwig wrote:
> Hi all,
>
> this series switches the x86 code the the dma-direct implementation
> for direct (non-iommu) dma and the generic swiotlb ops. This includes
> getting rid of the special ops for the AMD memory encryption case and
> the STA2x11 SOC. The generic implementations are based on the x86
> code, so they provide the same functionality.
Reviewed-by: Thomas Gleixner <[email protected]>
On 19/03/18 10:38, Christoph Hellwig wrote:
> Give the basic phys_to_dma and dma_to_phys helpers a __-prefix and add
> the memory encryption mask to the non-prefixed versions. Use the
> __-prefixed versions directly instead of clearing the mask again in
> various places.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/arm/include/asm/dma-direct.h | 4 ++--
As a heads-up, I've just realised there's now a silent (but
build-breaking) conflict with the current arm64 queue brewing here, as
we've unfortunately had to reintroduce ARCH_HAS_PHYS_TO_DMA as a means
of being safe against an ugly architectural corner case - currently
commit 1f85b42a691c ("arm64: Revert L1_CACHE_SHIFT back to 6 (64-byte
cache line size)") in -next.
Robin.
> arch/mips/cavium-octeon/dma-octeon.c | 10 ++++-----
> .../include/asm/mach-cavium-octeon/dma-coherence.h | 4 ++--
> .../include/asm/mach-loongson64/dma-coherence.h | 10 ++++-----
> arch/mips/loongson64/common/dma-swiotlb.c | 4 ++--
> arch/powerpc/include/asm/dma-direct.h | 4 ++--
> arch/x86/Kconfig | 2 +-
> arch/x86/include/asm/dma-direct.h | 25 ++--------------------
> arch/x86/mm/mem_encrypt.c | 2 +-
> arch/x86/pci/sta2x11-fixup.c | 6 +++---
> include/linux/dma-direct.h | 21 ++++++++++++++++--
> lib/swiotlb.c | 25 ++++++++--------------
> 12 files changed, 53 insertions(+), 64 deletions(-)
>
> diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h
> index 5b0a8a421894..b67e5fc1fe43 100644
> --- a/arch/arm/include/asm/dma-direct.h
> +++ b/arch/arm/include/asm/dma-direct.h
> @@ -2,13 +2,13 @@
> #ifndef ASM_ARM_DMA_DIRECT_H
> #define ASM_ARM_DMA_DIRECT_H 1
>
> -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> unsigned int offset = paddr & ~PAGE_MASK;
> return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset;
> }
>
> -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> {
> unsigned int offset = dev_addr & ~PAGE_MASK;
> return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
> diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
> index c7bb8a407041..7b335ab21697 100644
> --- a/arch/mips/cavium-octeon/dma-octeon.c
> +++ b/arch/mips/cavium-octeon/dma-octeon.c
> @@ -10,7 +10,7 @@
> * IP32 changes by Ilya.
> * Copyright (C) 2010 Cavium Networks, Inc.
> */
> -#include <linux/dma-mapping.h>
> +#include <linux/dma-direct.h>
> #include <linux/scatterlist.h>
> #include <linux/bootmem.h>
> #include <linux/export.h>
> @@ -182,7 +182,7 @@ struct octeon_dma_map_ops {
> phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
> };
>
> -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
> struct octeon_dma_map_ops,
> @@ -190,9 +190,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
>
> return ops->phys_to_dma(dev, paddr);
> }
> -EXPORT_SYMBOL(phys_to_dma);
> +EXPORT_SYMBOL(__phys_to_dma);
>
> -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
> struct octeon_dma_map_ops,
> @@ -200,7 +200,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
>
> return ops->dma_to_phys(dev, daddr);
> }
> -EXPORT_SYMBOL(dma_to_phys);
> +EXPORT_SYMBOL(__dma_to_phys);
>
> static struct octeon_dma_map_ops octeon_linear_dma_map_ops = {
> .dma_map_ops = {
> diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
> index 138edf6b5b48..6eb1ee548b11 100644
> --- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
> +++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
> @@ -69,8 +69,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> return addr + size - 1 <= *dev->dma_mask;
> }
>
> -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
> -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
> +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
> +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
>
> struct dma_map_ops;
> extern const struct dma_map_ops *octeon_pci_dma_map_ops;
> diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
> index b1b575f5c6c1..64fc44dec0a8 100644
> --- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h
> +++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
> @@ -25,13 +25,13 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> return addr + size - 1 <= *dev->dma_mask;
> }
>
> -extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
> -extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
> +extern dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
> +extern phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
> static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
> size_t size)
> {
> #ifdef CONFIG_CPU_LOONGSON3
> - return phys_to_dma(dev, virt_to_phys(addr));
> + return __phys_to_dma(dev, virt_to_phys(addr));
> #else
> return virt_to_phys(addr) | 0x80000000;
> #endif
> @@ -41,7 +41,7 @@ static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
> struct page *page)
> {
> #ifdef CONFIG_CPU_LOONGSON3
> - return phys_to_dma(dev, page_to_phys(page));
> + return __phys_to_dma(dev, page_to_phys(page));
> #else
> return page_to_phys(page) | 0x80000000;
> #endif
> @@ -51,7 +51,7 @@ static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
> dma_addr_t dma_addr)
> {
> #if defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_64BIT)
> - return dma_to_phys(dev, dma_addr);
> + return __dma_to_phys(dev, dma_addr);
> #elif defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT)
> return (dma_addr > 0x8fffffff) ? dma_addr : (dma_addr & 0x0fffffff);
> #else
> diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c
> index 7bbcf89475f3..6a739f8ae110 100644
> --- a/arch/mips/loongson64/common/dma-swiotlb.c
> +++ b/arch/mips/loongson64/common/dma-swiotlb.c
> @@ -63,7 +63,7 @@ static int loongson_dma_supported(struct device *dev, u64 mask)
> return swiotlb_dma_supported(dev, mask);
> }
>
> -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> long nid;
> #ifdef CONFIG_PHYS48_TO_HT40
> @@ -75,7 +75,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> return paddr;
> }
>
> -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> long nid;
> #ifdef CONFIG_PHYS48_TO_HT40
> diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
> index a5b59c765426..7702875aabb7 100644
> --- a/arch/powerpc/include/asm/dma-direct.h
> +++ b/arch/powerpc/include/asm/dma-direct.h
> @@ -17,12 +17,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> return addr + size - 1 <= *dev->dma_mask;
> }
>
> -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> return paddr + get_dma_offset(dev);
> }
>
> -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> return daddr - get_dma_offset(dev);
> }
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 10f482beda1b..1ca4f0874517 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -54,7 +54,6 @@ config X86
> select ARCH_HAS_FORTIFY_SOURCE
> select ARCH_HAS_GCOV_PROFILE_ALL
> select ARCH_HAS_KCOV if X86_64
> - select ARCH_HAS_PHYS_TO_DMA
> select ARCH_HAS_MEMBARRIER_SYNC_CORE
> select ARCH_HAS_PMEM_API if X86_64
> select ARCH_HAS_REFCOUNT
> @@ -692,6 +691,7 @@ config X86_SUPPORTS_MEMORY_FAILURE
> config STA2X11
> bool "STA2X11 Companion Chip Support"
> depends on X86_32_NON_STANDARD && PCI
> + select ARCH_HAS_PHYS_TO_DMA
> select X86_DEV_DMA_OPS
> select X86_DMA_REMAP
> select SWIOTLB
> diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
> index 1295bc622ebe..1a19251eaac9 100644
> --- a/arch/x86/include/asm/dma-direct.h
> +++ b/arch/x86/include/asm/dma-direct.h
> @@ -2,29 +2,8 @@
> #ifndef ASM_X86_DMA_DIRECT_H
> #define ASM_X86_DMA_DIRECT_H 1
>
> -#include <linux/mem_encrypt.h>
> -
> -#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
> bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
> -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
> -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
> -#else
> -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> -{
> - if (!dev->dma_mask)
> - return 0;
> -
> - return addr + size - 1 <= *dev->dma_mask;
> -}
> -
> -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> -{
> - return __sme_set(paddr);
> -}
> +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
> +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
>
> -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> -{
> - return __sme_clr(daddr);
> -}
> -#endif /* CONFIG_X86_DMA_REMAP */
> #endif /* ASM_X86_DMA_DIRECT_H */
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index d3b80d5f9828..1a05bea831a8 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -216,7 +216,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> * Since we will be clearing the encryption bit, check the
> * mask with it already cleared.
> */
> - addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
> + addr = __phys_to_dma(dev, page_to_phys(page));
> if ((addr + size) > dev->coherent_dma_mask) {
> __free_pages(page, get_order(size));
> } else {
> diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
> index eac58e03f43c..7a5bafb76d77 100644
> --- a/arch/x86/pci/sta2x11-fixup.c
> +++ b/arch/x86/pci/sta2x11-fixup.c
> @@ -207,11 +207,11 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> }
>
> /**
> - * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
> + * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
> * @dev: device for a PCI device
> * @paddr: Physical address
> */
> -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> if (!dev->archdata.is_sta2x11)
> return paddr;
> @@ -223,7 +223,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> * @dev: device for a PCI device
> * @daddr: STA2x11 AMBA DMA address
> */
> -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
> {
> if (!dev->archdata.is_sta2x11)
> return daddr;
> diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
> index bcdb1a3e4b1f..53ad6a47f513 100644
> --- a/include/linux/dma-direct.h
> +++ b/include/linux/dma-direct.h
> @@ -3,18 +3,19 @@
> #define _LINUX_DMA_DIRECT_H 1
>
> #include <linux/dma-mapping.h>
> +#include <linux/mem_encrypt.h>
>
> #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
> #include <asm/dma-direct.h>
> #else
> -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> {
> dma_addr_t dev_addr = (dma_addr_t)paddr;
>
> return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
> }
>
> -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
> {
> phys_addr_t paddr = (phys_addr_t)dev_addr;
>
> @@ -30,6 +31,22 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> }
> #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
>
> +/*
> + * If memory encryption is supported, phys_to_dma will set the memory encryption
> + * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma
> + * and __dma_to_phys versions should only be used on non-encrypted memory for
> + * special occasions like DMA coherent buffers.
> + */
> +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +{
> + return __sme_set(__phys_to_dma(dev, paddr));
> +}
> +
> +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +{
> + return __sme_clr(__dma_to_phys(dev, daddr));
> +}
> +
> #ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN
> void dma_mark_clean(void *addr, size_t size);
> #else
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> index 005d1d87bb2e..8b06b4485e65 100644
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
> return size ? size : (IO_TLB_DEFAULT_SIZE);
> }
>
> -/* For swiotlb, clear memory encryption mask from dma addresses */
> -static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
> - phys_addr_t address)
> -{
> - return __sme_clr(phys_to_dma(hwdev, address));
> -}
> -
> /* Note that this doesn't work with highmem page */
> static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
> volatile void *address)
> @@ -622,7 +615,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
> return SWIOTLB_MAP_ERROR;
> }
>
> - start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
> + start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
> return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
> dir, attrs);
> }
> @@ -726,12 +719,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
> goto out_warn;
>
> phys_addr = swiotlb_tbl_map_single(dev,
> - swiotlb_phys_to_dma(dev, io_tlb_start),
> + __phys_to_dma(dev, io_tlb_start),
> 0, size, DMA_FROM_DEVICE, 0);
> if (phys_addr == SWIOTLB_MAP_ERROR)
> goto out_warn;
>
> - *dma_handle = swiotlb_phys_to_dma(dev, phys_addr);
> + *dma_handle = __phys_to_dma(dev, phys_addr);
> if (dma_coherent_ok(dev, *dma_handle, size))
> goto out_unmap;
>
> @@ -867,10 +860,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
> map = map_single(dev, phys, size, dir, attrs);
> if (map == SWIOTLB_MAP_ERROR) {
> swiotlb_full(dev, size, dir, 1);
> - return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
> + return __phys_to_dma(dev, io_tlb_overflow_buffer);
> }
>
> - dev_addr = swiotlb_phys_to_dma(dev, map);
> + dev_addr = __phys_to_dma(dev, map);
>
> /* Ensure that the address returned is DMA'ble */
> if (dma_capable(dev, dev_addr, size))
> @@ -879,7 +872,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
> attrs |= DMA_ATTR_SKIP_CPU_SYNC;
> swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
>
> - return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
> + return __phys_to_dma(dev, io_tlb_overflow_buffer);
> }
>
> /*
> @@ -1009,7 +1002,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
> sg_dma_len(sgl) = 0;
> return 0;
> }
> - sg->dma_address = swiotlb_phys_to_dma(hwdev, map);
> + sg->dma_address = __phys_to_dma(hwdev, map);
> } else
> sg->dma_address = dev_addr;
> sg_dma_len(sg) = sg->length;
> @@ -1073,7 +1066,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
> int
> swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
> {
> - return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
> + return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
> }
>
> /*
> @@ -1085,7 +1078,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
> int
> swiotlb_dma_supported(struct device *hwdev, u64 mask)
> {
> - return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
> + return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
> }
>
> #ifdef CONFIG_DMA_DIRECT_OPS
>
On Mon, Mar 19, 2018 at 03:19:04PM +0000, Robin Murphy wrote:
> As a heads-up, I've just realised there's now a silent (but build-breaking)
> conflict with the current arm64 queue brewing here, as we've unfortunately
> had to reintroduce ARCH_HAS_PHYS_TO_DMA as a means of being safe against an
> ugly architectural corner case - currently commit 1f85b42a691c ("arm64:
> Revert L1_CACHE_SHIFT back to 6 (64-byte cache line size)") in -next.
Please revert that arm64 commit. This condition should be handled
in common code as it is not arm specific. And next time please CC
the iommu list and dma-mapping maintainers before doing such a change.
On Mon, Mar 19, 2018 at 11:38:12AM +0100, Christoph Hellwig wrote:
> Hi all,
>
> this series switches the x86 code the the dma-direct implementation
> for direct (non-iommu) dma and the generic swiotlb ops. This includes
> getting rid of the special ops for the AMD memory encryption case and
> the STA2x11 SOC. The generic implementations are based on the x86
> code, so they provide the same functionality.
I need to test this on my baremertal and Xen setup - and I lost your
git repo URL - any chance you mention point out to me so I can
kick of a build?
Thanks.
>
> Changes since V2:
> - minor changelog tweaks
> - made is_sta2x11 a plain bool
>
> Changes since V1:
> - fix the length in the set_memory_decrypted call
> - fix the SEV case
On Mon, Mar 19, 2018 at 11:27:37AM -0400, Konrad Rzeszutek Wilk wrote:
> On Mon, Mar 19, 2018 at 11:38:12AM +0100, Christoph Hellwig wrote:
> > Hi all,
> >
> > this series switches the x86 code the the dma-direct implementation
> > for direct (non-iommu) dma and the generic swiotlb ops. This includes
> > getting rid of the special ops for the AMD memory encryption case and
> > the STA2x11 SOC. The generic implementations are based on the x86
> > code, so they provide the same functionality.
>
> I need to test this on my baremertal and Xen setup - and I lost your
> git repo URL - any chance you mention point out to me so I can
> kick of a build?
git://git.infradead.org/users/hch/misc.git dma-direct-x86
On 19/03/18 15:24, Christoph Hellwig wrote:
> On Mon, Mar 19, 2018 at 03:19:04PM +0000, Robin Murphy wrote:
>> As a heads-up, I've just realised there's now a silent (but build-breaking)
>> conflict with the current arm64 queue brewing here, as we've unfortunately
>> had to reintroduce ARCH_HAS_PHYS_TO_DMA as a means of being safe against an
>> ugly architectural corner case - currently commit 1f85b42a691c ("arm64:
>> Revert L1_CACHE_SHIFT back to 6 (64-byte cache line size)") in -next.
>
> Please revert that arm64 commit. This condition should be handled
> in common code as it is not arm specific. And next time please CC
> the iommu list and dma-mapping maintainers before doing such a change.
There didn't seem enough justification to clutter up core SWIOTLB code
with the ability to force bouncing on a per-device basis, but if you
think there are real potential users out there then fair enough. For
arm64, it's extremely unlikely that anyone will ever build a
sufficiently wacky system to actually hit this code path; we really only
implemented it for peace of mind per the letter of the architecture.
Robin.
On Mon, Mar 19, 2018 at 05:03:43PM +0100, Christoph Hellwig wrote:
> On Mon, Mar 19, 2018 at 03:48:33PM +0000, Will Deacon wrote:
> > Why can't we just resolve the conflict by adding the underscores?
>
> We can solve the conflict easily that way. But that's not the point.
>
> The point is that I've been fighting hard to consolidate dma code
> given that the behavior really is common and not arch specific. And
> this one is another case like that: the fact that the non-coherent
> dma boundary is bigger than the exposed size is something that can
> easily happen elsewhere, so there is no need to duplicate a lot
> of code for that.
Fair enough, although I wouldn't say it's a *lot* of code being duplicated.
Are there other architectures working around this issue too? I couldn't
see anything in the other dma-direct.h headers.
> Nevermind that the commit should at least be three different patches:
>
> (1) revert the broken original commit
> (2) increase the dma min alignment
> (3) put the swiotlb workaround in place
I'd agree with you if this wasn't already queued and sitting in -next.
Reverting what we currently have seems a bit OTT now. Catalin?
Will
On Mon, Mar 19, 2018 at 05:03:43PM +0100, Christoph Hellwig wrote:
> On Mon, Mar 19, 2018 at 03:48:33PM +0000, Will Deacon wrote:
> > Why can't we just resolve the conflict by adding the underscores?
>
> We can solve the conflict easily that way. But that's not the point.
>
> The point is that I've been fighting hard to consolidate dma code
> given that the behavior really is common and not arch specific. And
> this one is another case like that: the fact that the non-coherent
> dma boundary is bigger than the exposed size is something that can
> easily happen elsewhere, so there is no need to duplicate a lot
> of code for that.
I don't particularly like maintaining an arm64-specific dma-direct.h
either but arm64 seems to be the only architecture that needs to
potentially force a bounce when cache_line_size() > ARCH_DMA_MINALIGN
and the device is non-coherent. Note that lib/swiotlb.c doesn't even
deal with non-coherent DMA (e.g. map_sg doesn't have arch callbacks for
cache maintenance), so not disrupting lib/swiotlb.c seems to be the
least intrusive option.
> Nevermind that the commit should at least be three different patches:
>
> (1) revert the broken original commit
> (2) increase the dma min alignment
Reverting the original commit could, on its own, break an SoC which
expects ARCH_DMA_MINALIGN == 128. So these two should be a single commit
(my patch only reverts the L1_CACHE_BYTES change rather than
ARCH_DMA_MINALIGN, the latter being correct as 128).
Anyway, it's queued already and we try not to rebase the branches we
published. Fix-ups on top are fine though.
> (3) put the swiotlb workaround in place
As I said above, adding a check in swiotlb.c for
!is_device_dma_coherent(dev) && (ARCH_DMA_MINALIGN < cache_line_size())
feels too architecture specific. Adding yet another hook like
arch_dma_capable() doesn't feel right either since we already have the
possibility to override dma_capable() by selecting ARCH_HAS_PHYS_TO_DMA.
The "cleanest" I came up with for swiotlb.c was a new
DMA_ATTR_FORCE_BOUNCE attribute. However, it required more changes to
the arm64 dma-mapping.c than simply implementing an arch-specific
dma_capable().
--
Catalin
On Mon, Mar 19, 2018 at 03:48:33PM +0000, Will Deacon wrote:
> Why can't we just resolve the conflict by adding the underscores?
We can solve the conflict easily that way. But that's not the point.
The point is that I've been fighting hard to consolidate dma code
given that the behavior really is common and not arch specific. And
this one is another case like that: the fact that the non-coherent
dma boundary is bigger than the exposed size is something that can
easily happen elsewhere, so there is no need to duplicate a lot
of code for that.
Nevermind that the commit should at least be three different patches:
(1) revert the broken original commit
(2) increase the dma min alignment
(3) put the swiotlb workaround in place
On Mon, Mar 19, 2018 at 06:01:41PM +0000, Catalin Marinas wrote:
> I don't particularly like maintaining an arm64-specific dma-direct.h
> either but arm64 seems to be the only architecture that needs to
> potentially force a bounce when cache_line_size() > ARCH_DMA_MINALIGN
> and the device is non-coherent.
mips is another likely candidate, see all the recent drama about
dma_get_alignmet(). And I'm also having major discussion about even
exposing the cache line size architecturally for RISC-V, so changes
are high it'll have to deal with this mess sooner or later as they
probably can't agree on a specific cache line size.
> Note that lib/swiotlb.c doesn't even
> deal with non-coherent DMA (e.g. map_sg doesn't have arch callbacks for
> cache maintenance), so not disrupting lib/swiotlb.c seems to be the
> least intrusive option.
No yet. I have patches to consolidate the various swiotlb ops
that deal with cache flushing or barriers. I was hoping to get them
in for this merge window, but it probably is too late now given that
I have a few other fires to fight. But they are going to be out
early for the next merge window.
> > Nevermind that the commit should at least be three different patches:
> >
> > (1) revert the broken original commit
> > (2) increase the dma min alignment
>
> Reverting the original commit could, on its own, break an SoC which
> expects ARCH_DMA_MINALIGN == 128. So these two should be a single commit
> (my patch only reverts the L1_CACHE_BYTES change rather than
> ARCH_DMA_MINALIGN, the latter being correct as 128).
It would revert to the state before this commit.
> As I said above, adding a check in swiotlb.c for
> !is_device_dma_coherent(dev) && (ARCH_DMA_MINALIGN < cache_line_size())
> feels too architecture specific.
And what exactly is architecture specific about that? It is a totally
generic concept, which at this point also seems entirely theoretical
based on the previous mail in this thread.
On Mon, Mar 19, 2018 at 03:37:20PM +0000, Robin Murphy wrote:
> On 19/03/18 15:24, Christoph Hellwig wrote:
> >On Mon, Mar 19, 2018 at 03:19:04PM +0000, Robin Murphy wrote:
> >>As a heads-up, I've just realised there's now a silent (but build-breaking)
> >>conflict with the current arm64 queue brewing here, as we've unfortunately
> >>had to reintroduce ARCH_HAS_PHYS_TO_DMA as a means of being safe against an
> >>ugly architectural corner case - currently commit 1f85b42a691c ("arm64:
> >>Revert L1_CACHE_SHIFT back to 6 (64-byte cache line size)") in -next.
> >
> >Please revert that arm64 commit. This condition should be handled
> >in common code as it is not arm specific. And next time please CC
> >the iommu list and dma-mapping maintainers before doing such a change.
>
> There didn't seem enough justification to clutter up core SWIOTLB code with
> the ability to force bouncing on a per-device basis, but if you think there
> are real potential users out there then fair enough. For arm64, it's
> extremely unlikely that anyone will ever build a sufficiently wacky system
> to actually hit this code path; we really only implemented it for peace of
> mind per the letter of the architecture.
I'm less sure. We will certainly see systems with large writeback granules,
the real question is whether or not they will be expected to work with
non-coherent DMA. Having silent data corruption in those situations is just
about the worst possible behaviour, so I'd like to do *something* instead
of that. Falling back to swiotlb bouncing with a taint seems sensible to me.
Why can't we just resolve the conflict by adding the underscores?
Will
* Christoph Hellwig <[email protected]> wrote:
> On Mon, Mar 19, 2018 at 11:27:37AM -0400, Konrad Rzeszutek Wilk wrote:
> > On Mon, Mar 19, 2018 at 11:38:12AM +0100, Christoph Hellwig wrote:
> > > Hi all,
> > >
> > > this series switches the x86 code the the dma-direct implementation
> > > for direct (non-iommu) dma and the generic swiotlb ops. This includes
> > > getting rid of the special ops for the AMD memory encryption case and
> > > the STA2x11 SOC. The generic implementations are based on the x86
> > > code, so they provide the same functionality.
> >
> > I need to test this on my baremertal and Xen setup - and I lost your
> > git repo URL - any chance you mention point out to me so I can
> > kick of a build?
>
> git://git.infradead.org/users/hch/misc.git dma-direct-x86
Btw., what's the upstreaming route for these patches?
While it's a multi-arch series it's all pretty x86-heavy as well so we can host it
in -tip (in tip:core/dma or such), but feel free to handle it yourself as well:
Reviewed-by: Ingo Molnar <[email protected]>
Thanks,
Ingo
On Tue, Mar 20, 2018 at 09:37:51AM +0100, Ingo Molnar wrote:
> > git://git.infradead.org/users/hch/misc.git dma-direct-x86
>
> Btw., what's the upstreaming route for these patches?
>
> While it's a multi-arch series it's all pretty x86-heavy as well so we can host it
> in -tip (in tip:core/dma or such), but feel free to handle it yourself as well:
>
> Reviewed-by: Ingo Molnar <[email protected]>
Either way is fine with me. The dma-mapping tree is pretty light this
cycles, so I don't expect any conflicts. If you want it feel free to grab
it, otherwise I'll queue it up.
* Christoph Hellwig <[email protected]> wrote:
> On Tue, Mar 20, 2018 at 09:37:51AM +0100, Ingo Molnar wrote:
> > > git://git.infradead.org/users/hch/misc.git dma-direct-x86
> >
> > Btw., what's the upstreaming route for these patches?
> >
> > While it's a multi-arch series it's all pretty x86-heavy as well so we can host it
> > in -tip (in tip:core/dma or such), but feel free to handle it yourself as well:
> >
> > Reviewed-by: Ingo Molnar <[email protected]>
>
> Either way is fine with me. The dma-mapping tree is pretty light this
> cycles, so I don't expect any conflicts. If you want it feel free to grab
> it, otherwise I'll queue it up.
Ok, I picked your series up into tip:core/dma:
- added the newly arrived Tested-by's and Reviewed-by's
- some minor edits to titles/changelogs, no functional changes
will push it all out after testing.
Thanks,
Ingo
Commit-ID: 5927145efd5de71976e62e2822511b13014d7e56
Gitweb: https://git.kernel.org/tip/5927145efd5de71976e62e2822511b13014d7e56
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:13 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:05 +0100
x86/cpu: Remove the CONFIG_X86_PPRO_FENCE=y quirk
There were only a few Pentium Pro multiprocessors systems where this
errata applied. They are more than 20 years old now, and we've slowly
dropped places which put the workarounds in and discouraged anyone
from enabling the workaround.
Get rid of it for good.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/Kconfig.cpu | 13 -------------
arch/x86/entry/vdso/vdso32/vclock_gettime.c | 2 --
arch/x86/include/asm/barrier.h | 30 -----------------------------
arch/x86/include/asm/io.h | 15 ---------------
arch/x86/kernel/pci-nommu.c | 19 ------------------
arch/x86/um/asm/barrier.h | 4 ----
6 files changed, 83 deletions(-)
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 65a9a4716e34..f0c5ef578153 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT
default "4" if MELAN || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-config X86_PPRO_FENCE
- bool "PentiumPro memory ordering errata workaround"
- depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
- ---help---
- Old PentiumPro multiprocessor systems had errata that could cause
- memory operations to violate the x86 ordering standard in rare cases.
- Enabling this option will attempt to work around some (but not all)
- occurrences of this problem, at the cost of much heavier spinlock and
- memory barrier operations.
-
- If unsure, say n here. Even distro kernels should think twice before
- enabling this: there are few systems, and an unlikely bug.
-
config X86_F00F_BUG
def_bool y
depends on M586MMX || M586TSC || M586 || M486
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 7780bbfb06ef..9242b28418d5 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -5,8 +5,6 @@
#undef CONFIG_OPTIMIZE_INLINING
#endif
-#undef CONFIG_X86_PPRO_FENCE
-
#ifdef CONFIG_X86_64
/*
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index e1259f043ae9..042b5e892ed1 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
"lfence", X86_FEATURE_LFENCE_RDTSC)
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb() rmb()
-#else
#define dma_rmb() barrier()
-#endif
#define dma_wmb() barrier()
#ifdef CONFIG_X86_32
@@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
#define __smp_wmb() barrier()
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#if defined(CONFIG_X86_PPRO_FENCE)
-
-/*
- * For this option x86 doesn't have a strong TSO memory
- * model and we should fall back to full barriers.
- */
-
-#define __smp_store_release(p, v) \
-do { \
- compiletime_assert_atomic_type(*p); \
- __smp_mb(); \
- WRITE_ONCE(*p, v); \
-} while (0)
-
-#define __smp_load_acquire(p) \
-({ \
- typeof(*p) ___p1 = READ_ONCE(*p); \
- compiletime_assert_atomic_type(*p); \
- __smp_mb(); \
- ___p1; \
-})
-
-#else /* regular x86 TSO memory ordering */
-
#define __smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
@@ -107,8 +79,6 @@ do { \
___p1; \
})
-#endif
-
/* Atomic operations are already serializing on x86 */
#define __smp_mb__before_atomic() barrier()
#define __smp_mb__after_atomic() barrier()
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 95e948627fd0..f6e5b9375d8c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void);
*/
#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
-/*
- * Cache management
- *
- * This needed for two cases
- * 1. Out of order aware processors
- * 2. Accidentally out of order processors (PPro errata #51)
- */
-
-static inline void flush_write_buffers(void)
-{
-#if defined(CONFIG_X86_PPRO_FENCE)
- asm volatile("lock; addl $0,0(%%esp)": : :"memory");
-#endif
-}
-
#endif /* __KERNEL__ */
extern void native_io_delay(void);
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 618285e475c6..ac7ea3a8242f 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return NOMMU_MAPPING_ERROR;
- flush_write_buffers();
return bus;
}
@@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
return 0;
s->dma_length = s->length;
}
- flush_write_buffers();
return nents;
}
-static void nommu_sync_single_for_device(struct device *dev,
- dma_addr_t addr, size_t size,
- enum dma_data_direction dir)
-{
- flush_write_buffers();
-}
-
-
-static void nommu_sync_sg_for_device(struct device *dev,
- struct scatterlist *sg, int nelems,
- enum dma_data_direction dir)
-{
- flush_write_buffers();
-}
-
static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return dma_addr == NOMMU_MAPPING_ERROR;
@@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = {
.free = dma_generic_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
- .sync_single_for_device = nommu_sync_single_for_device,
- .sync_sg_for_device = nommu_sync_sg_for_device,
.is_phys = 1,
.mapping_error = nommu_mapping_error,
.dma_supported = x86_dma_supported,
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index b7d73400ea29..f31e5d903161 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -30,11 +30,7 @@
#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb() rmb()
-#else /* CONFIG_X86_PPRO_FENCE */
#define dma_rmb() barrier()
-#endif /* CONFIG_X86_PPRO_FENCE */
#define dma_wmb() barrier()
#include <asm-generic/barrier.h>
On Tue, Mar 20, 2018 at 04:04:49AM -0700, tip-bot for Christoph Hellwig wrote:
> x86/cpu: Remove the CONFIG_X86_PPRO_FENCE=y quirk
>
> There were only a few Pentium Pro multiprocessors systems where this
> errata applied. They are more than 20 years old now, and we've slowly
> dropped places which put the workarounds in and discouraged anyone
> from enabling the workaround.
>
> Get rid of it for good.
Hoorah!! x86 is back to pure TSO.
On Tue, Mar 20, 2018 at 11:25:21AM +0000, Konrad Rzeszutek Wilk wrote:
> Are you testing with swiotlb=force?
I've done swiotlb=force testing with earlier versions, and otherwise
relied on a VM with more the 4gb of memory and emulated devices
without 64-bit capability.
On Mon, Mar 19, 2018 at 08:49:30PM +0100, Christoph Hellwig wrote:
> On Mon, Mar 19, 2018 at 06:01:41PM +0000, Catalin Marinas wrote:
> > I don't particularly like maintaining an arm64-specific dma-direct.h
> > either but arm64 seems to be the only architecture that needs to
> > potentially force a bounce when cache_line_size() > ARCH_DMA_MINALIGN
> > and the device is non-coherent.
>
> mips is another likely candidate, see all the recent drama about
> dma_get_alignmet(). And I'm also having major discussion about even
> exposing the cache line size architecturally for RISC-V, so changes
> are high it'll have to deal with this mess sooner or later as they
> probably can't agree on a specific cache line size.
On Arm, the cache line size varies between 32 and 128 on publicly
available hardware (and I wouldn't exclude higher numbers at some
point). In addition, the cache line size has a different meaning in the
DMA context, we call it "cache writeback granule" on Arm which is
greater than or equal the minimum cache line size.
So the aim is to have L1_CACHE_BYTES small enough for acceptable
performance numbers and ARCH_DMA_MINALIGN the maximum from a correctness
perspective (the latter is defined by some larger cache lines in L2/L3).
To make things worse, there is no clear definition in the generic kernel
on what cache_line_size() means and the default definition returns
L1_CACHE_BYTES. On arm64, we define it to the hardware's cache
writeback granule (CWG), if available, with a fallback on
ARCH_DMA_MINALIGN. The network layer, OTOH, seems to assume that
SMP_CACHE_BYTES is sufficient for DMA alignment (L1_CACHE_BYTES in
arm64's case).
> > As I said above, adding a check in swiotlb.c for
> > !is_device_dma_coherent(dev) && (ARCH_DMA_MINALIGN < cache_line_size())
> > feels too architecture specific.
>
> And what exactly is architecture specific about that? It is a totally
> generic concept, which at this point also seems entirely theoretical
> based on the previous mail in this thread.
The concept may be generic but the kernel macros/functions used here
aren't. is_device_dma_coherent() is only defined on arm and arm64. The
relation between ARCH_DMA_MINALIGN, L1_CACHE_BYTES and cache_line_size()
seems to be pretty ad-hoc. ARCH_DMA_MINALIGN is also only defined for
some architectures and, while there is dma_get_cache_alignment() which
returns this constant, it doesn't seem to be used much.
I'm all for fixing this in a generic way but I think we first need
swiotlb.c to become aware of non-cache-coherent DMA devices.
--
Catalin
On Tue, Mar 20, 2018 at 04:16:00PM +0100, Christoph Hellwig wrote:
> On Tue, Mar 20, 2018 at 11:25:21AM +0000, Konrad Rzeszutek Wilk wrote:
> > Are you testing with swiotlb=force?
>
> I've done swiotlb=force testing with earlier versions, and otherwise
> relied on a VM with more the 4gb of memory and emulated devices
> without 64-bit capability.
Awesome! I also did my testing so all good. Ingo, if you want to
you can add 'Acked-by: Konrad Rzeszutek Wilk <[email protected]>'
on the patches that don't have my tag.
Christoph, thank you for cleaning up this swamp!
Commit-ID: fec777c385b6376048fc4b08f039366545b335cd
Gitweb: https://git.kernel.org/tip/fec777c385b6376048fc4b08f039366545b335cd
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:15 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:56 +0100
x86/dma: Use DMA-direct (CONFIG_DMA_DIRECT_OPS=y)
The generic DMA-direct (CONFIG_DMA_DIRECT_OPS=y) implementation is now
functionally equivalent to the x86 nommu dma_map implementation, so
switch over to using it.
That includes switching from using x86_dma_supported in various IOMMU
drivers to use dma_direct_supported instead, which provides the same
functionality.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/dma-mapping.h | 8 -----
arch/x86/include/asm/iommu.h | 3 --
arch/x86/kernel/Makefile | 2 +-
arch/x86/kernel/amd_gart_64.c | 7 ++--
arch/x86/kernel/pci-calgary_64.c | 3 +-
arch/x86/kernel/pci-dma.c | 66 +-------------------------------------
arch/x86/kernel/pci-swiotlb.c | 5 ++-
arch/x86/pci/sta2x11-fixup.c | 2 +-
drivers/iommu/amd_iommu.c | 7 ++--
drivers/iommu/intel-iommu.c | 3 +-
11 files changed, 17 insertions(+), 90 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 18233e459bff..7dc347217d3a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -83,6 +83,7 @@ config X86
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
select CLOCKSOURCE_WATCHDOG
select DCACHE_WORD_ACCESS
+ select DMA_DIRECT_OPS
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select GENERIC_CLOCKEVENTS
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 545bf3721bc0..df9816b385eb 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,14 +36,6 @@ int arch_dma_supported(struct device *dev, u64 mask);
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
#define arch_dma_alloc_attrs arch_dma_alloc_attrs
-extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag,
- unsigned long attrs);
-
-extern void dma_generic_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs);
-
static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
{
if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 1e5d5d92eb40..baedab8ac538 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,13 +2,10 @@
#ifndef _ASM_X86_IOMMU_H
#define _ASM_X86_IOMMU_H
-extern const struct dma_map_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int iommu_pass_through;
-int x86_dma_supported(struct device *dev, u64 mask);
-
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 29786c87e864..2e8c8a09ecab 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
-obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
+obj-y += alternative.o i8253.o hw_breakpoint.o
obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index ecd486cb06ab..52e3abcf3e70 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -501,8 +501,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
}
__free_pages(page, get_order(size));
} else
- return dma_generic_alloc_coherent(dev, size, dma_addr, flag,
- attrs);
+ return dma_direct_alloc(dev, size, dma_addr, flag, attrs);
return NULL;
}
@@ -513,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
- dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
+ dma_direct_free(dev, size, vaddr, dma_addr, attrs);
}
static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -705,7 +704,7 @@ static const struct dma_map_ops gart_dma_ops = {
.alloc = gart_alloc_coherent,
.free = gart_free_coherent,
.mapping_error = gart_mapping_error,
- .dma_supported = x86_dma_supported,
+ .dma_supported = dma_direct_supported,
};
static void gart_iommu_shutdown(void)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 35c461f21815..5647853053bd 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/crash_dump.h>
#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/bitmap.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
@@ -493,7 +494,7 @@ static const struct dma_map_ops calgary_dma_ops = {
.map_page = calgary_map_page,
.unmap_page = calgary_unmap_page,
.mapping_error = calgary_mapping_error,
- .dma_supported = x86_dma_supported,
+ .dma_supported = dma_direct_supported,
};
static inline void __iomem * busno_to_bbar(unsigned char num)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index b59820872ec7..db0b88ea8d1b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -18,7 +18,7 @@
static int forbid_dac __read_mostly;
-const struct dma_map_ops *dma_ops = &nommu_dma_ops;
+const struct dma_map_ops *dma_ops = &dma_direct_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
@@ -76,60 +76,6 @@ void __init pci_iommu_alloc(void)
}
}
}
-void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag,
- unsigned long attrs)
-{
- struct page *page;
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- dma_addr_t addr;
-
-again:
- page = NULL;
- /* CMA can be used only in the context which permits sleeping */
- if (gfpflags_allow_blocking(flag)) {
- page = dma_alloc_from_contiguous(dev, count, get_order(size),
- flag);
- if (page) {
- addr = phys_to_dma(dev, page_to_phys(page));
- if (addr + size > dev->coherent_dma_mask) {
- dma_release_from_contiguous(dev, page, count);
- page = NULL;
- }
- }
- }
- /* fallback */
- if (!page)
- page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
- if (!page)
- return NULL;
-
- addr = phys_to_dma(dev, page_to_phys(page));
- if (addr + size > dev->coherent_dma_mask) {
- __free_pages(page, get_order(size));
-
- if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
- !(flag & GFP_DMA)) {
- flag = (flag & ~GFP_DMA32) | GFP_DMA;
- goto again;
- }
-
- return NULL;
- }
- memset(page_address(page), 0, size);
- *dma_addr = addr;
- return page_address(page);
-}
-
-void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, unsigned long attrs)
-{
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- struct page *page = virt_to_page(vaddr);
-
- if (!dma_release_from_contiguous(dev, page, count))
- free_pages((unsigned long)vaddr, get_order(size));
-}
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
{
@@ -243,16 +189,6 @@ int arch_dma_supported(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(arch_dma_supported);
-int x86_dma_supported(struct device *dev, u64 mask)
-{
- /* Copied from i386. Doesn't make much sense, because it will
- only work for pci_alloc_coherent.
- The caller just has to use GFP_DMA in this case. */
- if (mask < DMA_BIT_MASK(24))
- return 0;
- return 1;
-}
-
static int __init pci_iommu_init(void)
{
struct iommu_table_entry *p;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 0ee0f8f34251..bcb6a9bf64ad 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -30,8 +30,7 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
*/
flags |= __GFP_NOWARN;
- vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
- attrs);
+ vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
if (vaddr)
return vaddr;
@@ -45,7 +44,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size,
if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
swiotlb_free_coherent(dev, size, vaddr, dma_addr);
else
- dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
+ dma_direct_free(dev, size, vaddr, dma_addr, attrs);
}
static const struct dma_map_ops x86_swiotlb_dma_ops = {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 75577c1490c4..6c712fe11bdc 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -193,7 +193,7 @@ static const struct dma_map_ops sta2x11_dma_ops = {
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = swiotlb_sync_sg_for_device,
.mapping_error = swiotlb_dma_mapping_error,
- .dma_supported = x86_dma_supported,
+ .dma_supported = dma_direct_supported,
};
/* At setup time, we use our own ops if the device is a ConneXt one */
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 74788fdeb773..0bf19423b588 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -28,6 +28,7 @@
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/iommu-helper.h>
#include <linux/iommu.h>
#include <linux/delay.h>
@@ -2193,7 +2194,7 @@ static int amd_iommu_add_device(struct device *dev)
dev_name(dev));
iommu_ignore_device(dev);
- dev->dma_ops = &nommu_dma_ops;
+ dev->dma_ops = &dma_direct_ops;
goto out;
}
init_iommu_group(dev);
@@ -2680,7 +2681,7 @@ free_mem:
*/
static int amd_iommu_dma_supported(struct device *dev, u64 mask)
{
- if (!x86_dma_supported(dev, mask))
+ if (!dma_direct_supported(dev, mask))
return 0;
return check_device(dev);
}
@@ -2794,7 +2795,7 @@ int __init amd_iommu_init_dma_ops(void)
* continue to be SWIOTLB.
*/
if (!swiotlb)
- dma_ops = &nommu_dma_ops;
+ dma_ops = &dma_direct_ops;
if (amd_iommu_unmap_flush)
pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 582fd01cb7d1..fd899b2a12bb 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -45,6 +45,7 @@
#include <linux/pci-ats.h>
#include <linux/memblock.h>
#include <linux/dma-contiguous.h>
+#include <linux/dma-direct.h>
#include <linux/crash_dump.h>
#include <asm/irq_remapping.h>
#include <asm/cacheflush.h>
@@ -3871,7 +3872,7 @@ const struct dma_map_ops intel_dma_ops = {
.unmap_page = intel_unmap_page,
.mapping_error = intel_mapping_error,
#ifdef CONFIG_X86
- .dma_supported = x86_dma_supported,
+ .dma_supported = dma_direct_supported,
#endif
};
Commit-ID: 6e4bf586778315b3fc53b728c53eefc247cfc3ff
Gitweb: https://git.kernel.org/tip/6e4bf586778315b3fc53b728c53eefc247cfc3ff
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:16 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:57 +0100
x86/dma: Use generic swiotlb_ops
The generic swiotlb DMA ops were based on the x86 ones and provide
equivalent functionality, so use them.
Also fix the sta2x11 case. For that SOC the DMA map ops need an
additional physical to DMA address translations. For swiotlb buffers
that is done throught the phys_to_dma helper, but the sta2x11_dma_ops
also added an additional translation on the return value from
x86_swiotlb_alloc_coherent, which is only correct if that functions
returns a direct allocation and not a swiotlb buffer. With the
generic swiotlb and DMA-direct code phys_to_dma is not always used
and the separate sta2x11_dma_ops can be replaced with a simple
bit that marks if the additional physical to DMA address translation
is needed.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/device.h | 3 +++
arch/x86/include/asm/swiotlb.h | 8 -------
arch/x86/kernel/pci-swiotlb.c | 47 +-----------------------------------------
arch/x86/pci/sta2x11-fixup.c | 46 +++++------------------------------------
4 files changed, 9 insertions(+), 95 deletions(-)
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 5e12c63b47aa..a8f6c809d9b1 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,6 +6,9 @@ struct dev_archdata {
#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
void *iommu; /* hook for IOMMU specific extension */
#endif
+#ifdef CONFIG_STA2X11
+ bool is_sta2x11;
+#endif
};
#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 1c6a6cb230ff..ff6c92eff035 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void)
{
}
#endif
-
-extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- unsigned long attrs);
-extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs);
-
#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index bcb6a9bf64ad..661583662430 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -17,51 +17,6 @@
int swiotlb __read_mostly;
-void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- unsigned long attrs)
-{
- void *vaddr;
-
- /*
- * Don't print a warning when the first allocation attempt fails.
- * swiotlb_alloc_coherent() will print a warning when the DMA
- * memory allocation ultimately failed.
- */
- flags |= __GFP_NOWARN;
-
- vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
- if (vaddr)
- return vaddr;
-
- return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
-}
-
-void x86_swiotlb_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs)
-{
- if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
- swiotlb_free_coherent(dev, size, vaddr, dma_addr);
- else
- dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
-static const struct dma_map_ops x86_swiotlb_dma_ops = {
- .mapping_error = swiotlb_dma_mapping_error,
- .alloc = x86_swiotlb_alloc_coherent,
- .free = x86_swiotlb_free_coherent,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .dma_supported = NULL,
-};
-
/*
* pci_swiotlb_detect_override - set swiotlb to 1 if necessary
*
@@ -111,7 +66,7 @@ void __init pci_swiotlb_init(void)
{
if (swiotlb) {
swiotlb_init(0);
- dma_ops = &x86_swiotlb_dma_ops;
+ dma_ops = &swiotlb_dma_ops;
}
}
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 6c712fe11bdc..eac58e03f43c 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -159,43 +159,6 @@ static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev)
return p;
}
-/**
- * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers
- * returns virtual address. This is the only "special" function here.
- * @dev: PCI device
- * @size: Size of the buffer
- * @dma_handle: DMA address
- * @flags: memory flags
- */
-static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t flags,
- unsigned long attrs)
-{
- void *vaddr;
-
- vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
- *dma_handle = p2a(*dma_handle, to_pci_dev(dev));
- return vaddr;
-}
-
-/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
-static const struct dma_map_ops sta2x11_dma_ops = {
- .alloc = sta2x11_swiotlb_alloc_coherent,
- .free = x86_swiotlb_free_coherent,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = swiotlb_dma_mapping_error,
- .dma_supported = dma_direct_supported,
-};
-
/* At setup time, we use our own ops if the device is a ConneXt one */
static void sta2x11_setup_pdev(struct pci_dev *pdev)
{
@@ -205,7 +168,8 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
return;
pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
- pdev->dev.dma_ops = &sta2x11_dma_ops;
+ pdev->dev.dma_ops = &swiotlb_dma_ops;
+ pdev->dev.archdata.is_sta2x11 = true;
/* We must enable all devices as master, for audio DMA to work */
pci_set_master(pdev);
@@ -225,7 +189,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
struct sta2x11_mapping *map;
- if (dev->dma_ops != &sta2x11_dma_ops) {
+ if (!dev->archdata.is_sta2x11) {
if (!dev->dma_mask)
return false;
return addr + size - 1 <= *dev->dma_mask;
@@ -249,7 +213,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
*/
dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
{
- if (dev->dma_ops != &sta2x11_dma_ops)
+ if (!dev->archdata.is_sta2x11)
return paddr;
return p2a(paddr, to_pci_dev(dev));
}
@@ -261,7 +225,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
*/
phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
{
- if (dev->dma_ops != &sta2x11_dma_ops)
+ if (!dev->archdata.is_sta2x11)
return daddr;
return a2p(daddr, to_pci_dev(dev));
}
Commit-ID: 038d07a283d62336b32cc23b62aecdf9418cfc11
Gitweb: https://git.kernel.org/tip/038d07a283d62336b32cc23b62aecdf9418cfc11
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:14 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:56 +0100
x86/dma: Remove dma_alloc_coherent_mask()
These days all devices (including the ISA fallback device) have a coherent
DMA mask set, so remove the workaround.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/dma-mapping.h | 18 ++----------------
arch/x86/kernel/pci-dma.c | 10 ++++------
arch/x86/mm/mem_encrypt.c | 4 +---
drivers/xen/swiotlb-xen.c | 16 +---------------
4 files changed, 8 insertions(+), 40 deletions(-)
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 6277c83c0eb1..545bf3721bc0 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -44,26 +44,12 @@ extern void dma_generic_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
unsigned long attrs);
-static inline unsigned long dma_alloc_coherent_mask(struct device *dev,
- gfp_t gfp)
-{
- unsigned long dma_mask = 0;
-
- dma_mask = dev->coherent_dma_mask;
- if (!dma_mask)
- dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
-
- return dma_mask;
-}
-
static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
{
- unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp);
-
- if (dma_mask <= DMA_BIT_MASK(24))
+ if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
gfp |= GFP_DMA;
#ifdef CONFIG_X86_64
- if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
+ if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
gfp |= GFP_DMA32;
#endif
return gfp;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index df7ab02f959f..b59820872ec7 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -80,13 +80,10 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
unsigned long attrs)
{
- unsigned long dma_mask;
struct page *page;
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
dma_addr_t addr;
- dma_mask = dma_alloc_coherent_mask(dev, flag);
-
again:
page = NULL;
/* CMA can be used only in the context which permits sleeping */
@@ -95,7 +92,7 @@ again:
flag);
if (page) {
addr = phys_to_dma(dev, page_to_phys(page));
- if (addr + size > dma_mask) {
+ if (addr + size > dev->coherent_dma_mask) {
dma_release_from_contiguous(dev, page, count);
page = NULL;
}
@@ -108,10 +105,11 @@ again:
return NULL;
addr = phys_to_dma(dev, page_to_phys(page));
- if (addr + size > dma_mask) {
+ if (addr + size > dev->coherent_dma_mask) {
__free_pages(page, get_order(size));
- if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
+ if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
+ !(flag & GFP_DMA)) {
flag = (flag & ~GFP_DMA32) | GFP_DMA;
goto again;
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 3a1b5fe4c2ca..f6cd84beb610 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -198,12 +198,10 @@ void __init sme_early_init(void)
static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
- unsigned long dma_mask;
unsigned int order;
struct page *page;
void *vaddr = NULL;
- dma_mask = dma_alloc_coherent_mask(dev, gfp);
order = get_order(size);
/*
@@ -221,7 +219,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
* mask with it already cleared.
*/
addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
- if ((addr + size) > dma_mask) {
+ if ((addr + size) > dev->coherent_dma_mask) {
__free_pages(page, get_order(size));
} else {
vaddr = page_address(page);
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 5bb72d3f8337..e1c60899fdbc 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -53,20 +53,6 @@
* API.
*/
-#ifndef CONFIG_X86
-static unsigned long dma_alloc_coherent_mask(struct device *dev,
- gfp_t gfp)
-{
- unsigned long dma_mask = 0;
-
- dma_mask = dev->coherent_dma_mask;
- if (!dma_mask)
- dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
-
- return dma_mask;
-}
-#endif
-
#define XEN_SWIOTLB_ERROR_CODE (~(dma_addr_t)0x0)
static char *xen_io_tlb_start, *xen_io_tlb_end;
@@ -328,7 +314,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
return ret;
if (hwdev && hwdev->coherent_dma_mask)
- dma_mask = dma_alloc_coherent_mask(hwdev, flags);
+ dma_mask = hwdev->coherent_dma_mask;
/* At this point dma_handle is the physical address, next we are
* going to set it to the machine address.
Commit-ID: f3c39d51043dcee5dd4d51dffcfe9ce01a263582
Gitweb: https://git.kernel.org/tip/f3c39d51043dcee5dd4d51dffcfe9ce01a263582
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:17 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:57 +0100
x86/dma/amd_gart: Look at dev->coherent_dma_mask instead of GFP_DMA
We want to phase out looking at the magic GFP_DMA flag in the DMA mapping
routines, so switch the gart driver to use the dev->coherent_dma_mask
instead, which is used to select the GFP_DMA flag in the caller.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/amd_gart_64.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 52e3abcf3e70..79ac6caaaabb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -484,7 +484,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
unsigned long align_mask;
struct page *page;
- if (force_iommu && !(flag & GFP_DMA)) {
+ if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
page = alloc_pages(flag | __GFP_ZERO, get_order(size));
if (!page)
Commit-ID: 51c7eeba7975c1d2a02eefd00ece6de25176f5f3
Gitweb: https://git.kernel.org/tip/51c7eeba7975c1d2a02eefd00ece6de25176f5f3
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:18 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:57 +0100
x86/dma/amd_gart: Use dma_direct_{alloc,free}()
This gains support for CMA allocations for the force_iommu case, and
cleans up the code a bit.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/amd_gart_64.c | 36 ++++++++++++++----------------------
1 file changed, 14 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 79ac6caaaabb..f299d8a479bb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -480,29 +480,21 @@ static void *
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
gfp_t flag, unsigned long attrs)
{
- dma_addr_t paddr;
- unsigned long align_mask;
- struct page *page;
-
- if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- page = alloc_pages(flag | __GFP_ZERO, get_order(size));
- if (!page)
- return NULL;
-
- align_mask = (1UL << get_order(size)) - 1;
- paddr = dma_map_area(dev, page_to_phys(page), size,
- DMA_BIDIRECTIONAL, align_mask);
-
- flush_gart();
- if (paddr != bad_dma_addr) {
- *dma_addr = paddr;
- return page_address(page);
- }
- __free_pages(page, get_order(size));
- } else
- return dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+ void *vaddr;
+
+ vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+ if (!vaddr ||
+ !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
+ return vaddr;
+ *dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size,
+ DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1);
+ flush_gart();
+ if (unlikely(*dma_addr == bad_dma_addr))
+ goto out_free;
+ return vaddr;
+out_free:
+ dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
return NULL;
}
Commit-ID: d657c5c73ca987214a6f9436e435b34fc60f332a
Gitweb: https://git.kernel.org/tip/d657c5c73ca987214a6f9436e435b34fc60f332a
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:20 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:58 +0100
iommu/intel-iommu: Enable CONFIG_DMA_DIRECT_OPS=y and clean up intel_{alloc,free}_coherent()
Use the dma_direct_*() helpers and clean up the code flow.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
drivers/iommu/Kconfig | 1 +
drivers/iommu/intel-iommu.c | 62 ++++++++++++---------------------------------
2 files changed, 17 insertions(+), 46 deletions(-)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dc7c1914645d..df171cb85822 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -143,6 +143,7 @@ config DMAR_TABLE
config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
+ select DMA_DIRECT_OPS
select IOMMU_API
select IOMMU_IOVA
select DMAR_TABLE
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index fd899b2a12bb..24d1b1b42013 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -31,6 +31,7 @@
#include <linux/pci.h>
#include <linux/dmar.h>
#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/mempool.h>
#include <linux/memory.h>
#include <linux/cpu.h>
@@ -3708,61 +3709,30 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
unsigned long attrs)
{
- struct page *page = NULL;
- int order;
-
- size = PAGE_ALIGN(size);
- order = get_order(size);
-
- if (!iommu_no_mapping(dev))
- flags &= ~(GFP_DMA | GFP_DMA32);
- else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
- if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
- flags |= GFP_DMA;
- else
- flags |= GFP_DMA32;
- }
+ void *vaddr;
- if (gfpflags_allow_blocking(flags)) {
- unsigned int count = size >> PAGE_SHIFT;
+ vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+ if (iommu_no_mapping(dev) || !vaddr)
+ return vaddr;
- page = dma_alloc_from_contiguous(dev, count, order, flags);
- if (page && iommu_no_mapping(dev) &&
- page_to_phys(page) + size > dev->coherent_dma_mask) {
- dma_release_from_contiguous(dev, page, count);
- page = NULL;
- }
- }
-
- if (!page)
- page = alloc_pages(flags, order);
- if (!page)
- return NULL;
- memset(page_address(page), 0, size);
-
- *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
- DMA_BIDIRECTIONAL,
- dev->coherent_dma_mask);
- if (*dma_handle)
- return page_address(page);
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, order);
+ *dma_handle = __intel_map_single(dev, virt_to_phys(vaddr),
+ PAGE_ALIGN(size), DMA_BIDIRECTIONAL,
+ dev->coherent_dma_mask);
+ if (!*dma_handle)
+ goto out_free_pages;
+ return vaddr;
+out_free_pages:
+ dma_direct_free(dev, size, vaddr, *dma_handle, attrs);
return NULL;
}
static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
- int order;
- struct page *page = virt_to_page(vaddr);
-
- size = PAGE_ALIGN(size);
- order = get_order(size);
-
- intel_unmap(dev, dma_handle, size);
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, order);
+ if (!iommu_no_mapping(dev))
+ intel_unmap(dev, dma_handle, PAGE_ALIGN(size));
+ dma_direct_free(dev, size, vaddr, dma_handle, attrs);
}
static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
Commit-ID: 178c5682447ac0e315f0f3e27664fd4e0d2721cc
Gitweb: https://git.kernel.org/tip/178c5682447ac0e315f0f3e27664fd4e0d2721cc
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:21 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:58 +0100
x86/dma: Remove dma_alloc_coherent_gfp_flags()
All dma_ops implementations used on x86 now take care of setting their own
required GFP_ masks for the allocation. And given that the common code
now clears harmful flags itself that means we can stop the flags in all
the IOMMU implementations as well.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/dma-mapping.h | 11 -----------
arch/x86/kernel/pci-calgary_64.c | 2 --
arch/x86/kernel/pci-dma.c | 2 --
arch/x86/mm/mem_encrypt.c | 7 -------
4 files changed, 22 deletions(-)
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index df9816b385eb..89ce4bfd241f 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,15 +36,4 @@ int arch_dma_supported(struct device *dev, u64 mask);
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
#define arch_dma_alloc_attrs arch_dma_alloc_attrs
-static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
-{
- if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
- gfp |= GFP_DMA;
-#ifdef CONFIG_X86_64
- if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
- gfp |= GFP_DMA32;
-#endif
- return gfp;
-}
-
#endif
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 5647853053bd..bbfc8b1e9104 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -446,8 +446,6 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
npages = size >> PAGE_SHIFT;
order = get_order(size);
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
/* alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages(flag, order);
if (!ret)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index db0b88ea8d1b..14437116ffea 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -82,8 +82,6 @@ bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
if (!*dev)
*dev = &x86_dma_fallback_dev;
- *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
-
if (!is_device_dma_capable(*dev))
return false;
return true;
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index f6cd84beb610..1217a4fab915 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -203,13 +203,6 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
void *vaddr = NULL;
order = get_order(size);
-
- /*
- * Memory will be memset to zero after marking decrypted, so don't
- * bother clearing it before.
- */
- gfp &= ~__GFP_ZERO;
-
page = alloc_pages_node(dev_to_node(dev), gfp, order);
if (page) {
dma_addr_t addr;
Commit-ID: b7fa07460b0f0e9fbe6d9319a0864c145bd59bcb
Gitweb: https://git.kernel.org/tip/b7fa07460b0f0e9fbe6d9319a0864c145bd59bcb
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:22 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:58 +0100
set_memory.h: Provide set_memory_{en,de}crypted() stubs
... to make these APIs more universally available.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
include/linux/set_memory.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index e5140648f638..da5178216da5 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -17,4 +17,16 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif
+#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
+static inline int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ return 0;
+}
+
+static inline int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ return 0;
+}
+#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
+
#endif /* _LINUX_SET_MEMORY_H_ */
Commit-ID: b468620f2a1dfdcfddfd6fa54367b8bcc1b51248
Gitweb: https://git.kernel.org/tip/b468620f2a1dfdcfddfd6fa54367b8bcc1b51248
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:19 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:58 +0100
iommu/amd_iommu: Use CONFIG_DMA_DIRECT_OPS=y and dma_direct_{alloc,free}()
This cleans up the code a lot by removing duplicate logic.
Tested-by: Tom Lendacky <[email protected]>
Tested-by: Joerg Roedel <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Acked-by: Joerg Roedel <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
drivers/iommu/Kconfig | 1 +
drivers/iommu/amd_iommu.c | 68 +++++++++++++++--------------------------------
2 files changed, 22 insertions(+), 47 deletions(-)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f3a21343e636..dc7c1914645d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -107,6 +107,7 @@ config IOMMU_PGTABLES_L2
# AMD IOMMU support
config AMD_IOMMU
bool "AMD IOMMU support"
+ select DMA_DIRECT_OPS
select SWIOTLB
select PCI_MSI
select PCI_ATS
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0bf19423b588..83819d0cbf90 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2600,51 +2600,32 @@ static void *alloc_coherent(struct device *dev, size_t size,
unsigned long attrs)
{
u64 dma_mask = dev->coherent_dma_mask;
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- struct page *page;
-
- domain = get_domain(dev);
- if (PTR_ERR(domain) == -EINVAL) {
- page = alloc_pages(flag, get_order(size));
- *dma_addr = page_to_phys(page);
- return page_address(page);
- } else if (IS_ERR(domain))
- return NULL;
+ struct protection_domain *domain = get_domain(dev);
+ bool is_direct = false;
+ void *virt_addr;
- dma_dom = to_dma_ops_domain(domain);
- size = PAGE_ALIGN(size);
- dma_mask = dev->coherent_dma_mask;
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- flag |= __GFP_ZERO;
-
- page = alloc_pages(flag | __GFP_NOWARN, get_order(size));
- if (!page) {
- if (!gfpflags_allow_blocking(flag))
- return NULL;
-
- page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
- get_order(size), flag);
- if (!page)
+ if (IS_ERR(domain)) {
+ if (PTR_ERR(domain) != -EINVAL)
return NULL;
+ is_direct = true;
}
+ virt_addr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+ if (!virt_addr || is_direct)
+ return virt_addr;
+
if (!dma_mask)
dma_mask = *dev->dma_mask;
- *dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
- size, DMA_BIDIRECTIONAL, dma_mask);
-
+ *dma_addr = __map_single(dev, to_dma_ops_domain(domain),
+ virt_to_phys(virt_addr), PAGE_ALIGN(size),
+ DMA_BIDIRECTIONAL, dma_mask);
if (*dma_addr == AMD_IOMMU_MAPPING_ERROR)
goto out_free;
-
- return page_address(page);
+ return virt_addr;
out_free:
-
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, get_order(size));
-
+ dma_direct_free(dev, size, virt_addr, *dma_addr, attrs);
return NULL;
}
@@ -2655,24 +2636,17 @@ static void free_coherent(struct device *dev, size_t size,
void *virt_addr, dma_addr_t dma_addr,
unsigned long attrs)
{
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- struct page *page;
+ struct protection_domain *domain = get_domain(dev);
- page = virt_to_page(virt_addr);
size = PAGE_ALIGN(size);
- domain = get_domain(dev);
- if (IS_ERR(domain))
- goto free_mem;
+ if (!IS_ERR(domain)) {
+ struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain);
- dma_dom = to_dma_ops_domain(domain);
-
- __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
+ __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
+ }
-free_mem:
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, get_order(size));
+ dma_direct_free(dev, size, virt_addr, dma_addr, attrs);
}
/*
Commit-ID: e7de6c7cc207be78369d45fb833d7d53aeda47f8
Gitweb: https://git.kernel.org/tip/e7de6c7cc207be78369d45fb833d7d53aeda47f8
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:23 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:58 +0100
dma/swiotlb: Remove swiotlb_set_mem_attributes()
Now that set_memory_decrypted() is always available we can just call it
directly.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/mem_encrypt.h | 2 --
arch/x86/mm/mem_encrypt.c | 8 --------
lib/swiotlb.c | 12 ++++++------
3 files changed, 6 insertions(+), 16 deletions(-)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 8fe61ad21047..c0643831706e 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -49,8 +49,6 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
-
bool sme_active(void);
bool sev_active(void);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1217a4fab915..d243e8d80d89 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -441,11 +441,3 @@ void __init mem_encrypt_init(void)
: "Secure Memory Encryption (SME)");
}
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
-{
- WARN(PAGE_ALIGN(size) != size,
- "size is not page-aligned (%#lx)\n", size);
-
- /* Make the SWIOTLB buffer area decrypted */
- set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
-}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c43ec2271469..005d1d87bb2e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -31,6 +31,7 @@
#include <linux/gfp.h>
#include <linux/scatterlist.h>
#include <linux/mem_encrypt.h>
+#include <linux/set_memory.h>
#include <asm/io.h>
#include <asm/dma.h>
@@ -156,8 +157,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
-void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { }
-
/* For swiotlb, clear memory encryption mask from dma addresses */
static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
phys_addr_t address)
@@ -209,12 +208,12 @@ void __init swiotlb_update_mem_attributes(void)
vaddr = phys_to_virt(io_tlb_start);
bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
- swiotlb_set_mem_attributes(vaddr, bytes);
+ set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
vaddr = phys_to_virt(io_tlb_overflow_buffer);
bytes = PAGE_ALIGN(io_tlb_overflow);
- swiotlb_set_mem_attributes(vaddr, bytes);
+ set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
}
@@ -355,7 +354,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
io_tlb_start = virt_to_phys(tlb);
io_tlb_end = io_tlb_start + bytes;
- swiotlb_set_mem_attributes(tlb, bytes);
+ set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
memset(tlb, 0, bytes);
/*
@@ -366,7 +365,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
if (!v_overflow_buffer)
goto cleanup2;
- swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow);
+ set_memory_decrypted((unsigned long)v_overflow_buffer,
+ io_tlb_overflow >> PAGE_SHIFT);
memset(v_overflow_buffer, 0, io_tlb_overflow);
io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
Commit-ID: b6e05477c10c12e36141558fc14f04b00ea634d4
Gitweb: https://git.kernel.org/tip/b6e05477c10c12e36141558fc14f04b00ea634d4
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:24 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:59 +0100
dma/direct: Handle the memory encryption bit in common code
Give the basic phys_to_dma() and dma_to_phys() helpers a __-prefix and add
the memory encryption mask to the non-prefixed versions. Use the
__-prefixed versions directly instead of clearing the mask again in
various places.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/arm/include/asm/dma-direct.h | 4 ++--
arch/mips/cavium-octeon/dma-octeon.c | 10 ++++-----
.../include/asm/mach-cavium-octeon/dma-coherence.h | 4 ++--
.../include/asm/mach-loongson64/dma-coherence.h | 10 ++++-----
arch/mips/loongson64/common/dma-swiotlb.c | 4 ++--
arch/powerpc/include/asm/dma-direct.h | 4 ++--
arch/x86/Kconfig | 2 +-
arch/x86/include/asm/dma-direct.h | 25 ++--------------------
arch/x86/mm/mem_encrypt.c | 2 +-
arch/x86/pci/sta2x11-fixup.c | 6 +++---
include/linux/dma-direct.h | 21 ++++++++++++++++--
lib/swiotlb.c | 25 ++++++++--------------
12 files changed, 53 insertions(+), 64 deletions(-)
diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h
index 5b0a8a421894..b67e5fc1fe43 100644
--- a/arch/arm/include/asm/dma-direct.h
+++ b/arch/arm/include/asm/dma-direct.h
@@ -2,13 +2,13 @@
#ifndef ASM_ARM_DMA_DIRECT_H
#define ASM_ARM_DMA_DIRECT_H 1
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
unsigned int offset = paddr & ~PAGE_MASK;
return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset;
}
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
{
unsigned int offset = dev_addr & ~PAGE_MASK;
return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index c7bb8a407041..7b335ab21697 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -10,7 +10,7 @@
* IP32 changes by Ilya.
* Copyright (C) 2010 Cavium Networks, Inc.
*/
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/scatterlist.h>
#include <linux/bootmem.h>
#include <linux/export.h>
@@ -182,7 +182,7 @@ struct octeon_dma_map_ops {
phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
};
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
struct octeon_dma_map_ops,
@@ -190,9 +190,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
return ops->phys_to_dma(dev, paddr);
}
-EXPORT_SYMBOL(phys_to_dma);
+EXPORT_SYMBOL(__phys_to_dma);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
struct octeon_dma_map_ops,
@@ -200,7 +200,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
return ops->dma_to_phys(dev, daddr);
}
-EXPORT_SYMBOL(dma_to_phys);
+EXPORT_SYMBOL(__dma_to_phys);
static struct octeon_dma_map_ops octeon_linear_dma_map_ops = {
.dma_map_ops = {
diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
index 138edf6b5b48..6eb1ee548b11 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
@@ -69,8 +69,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
return addr + size - 1 <= *dev->dma_mask;
}
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
struct dma_map_ops;
extern const struct dma_map_ops *octeon_pci_dma_map_ops;
diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
index b1b575f5c6c1..64fc44dec0a8 100644
--- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h
+++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
@@ -25,13 +25,13 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
return addr + size - 1 <= *dev->dma_mask;
}
-extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+extern dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+extern phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
size_t size)
{
#ifdef CONFIG_CPU_LOONGSON3
- return phys_to_dma(dev, virt_to_phys(addr));
+ return __phys_to_dma(dev, virt_to_phys(addr));
#else
return virt_to_phys(addr) | 0x80000000;
#endif
@@ -41,7 +41,7 @@ static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
struct page *page)
{
#ifdef CONFIG_CPU_LOONGSON3
- return phys_to_dma(dev, page_to_phys(page));
+ return __phys_to_dma(dev, page_to_phys(page));
#else
return page_to_phys(page) | 0x80000000;
#endif
@@ -51,7 +51,7 @@ static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
dma_addr_t dma_addr)
{
#if defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_64BIT)
- return dma_to_phys(dev, dma_addr);
+ return __dma_to_phys(dev, dma_addr);
#elif defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT)
return (dma_addr > 0x8fffffff) ? dma_addr : (dma_addr & 0x0fffffff);
#else
diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c
index 7bbcf89475f3..6a739f8ae110 100644
--- a/arch/mips/loongson64/common/dma-swiotlb.c
+++ b/arch/mips/loongson64/common/dma-swiotlb.c
@@ -63,7 +63,7 @@ static int loongson_dma_supported(struct device *dev, u64 mask)
return swiotlb_dma_supported(dev, mask);
}
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
long nid;
#ifdef CONFIG_PHYS48_TO_HT40
@@ -75,7 +75,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
return paddr;
}
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
long nid;
#ifdef CONFIG_PHYS48_TO_HT40
diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
index a5b59c765426..7702875aabb7 100644
--- a/arch/powerpc/include/asm/dma-direct.h
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -17,12 +17,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
return addr + size - 1 <= *dev->dma_mask;
}
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
return paddr + get_dma_offset(dev);
}
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
return daddr - get_dma_offset(dev);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7dc347217d3a..5b4899de076f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,7 +54,6 @@ config X86
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
- select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_REFCOUNT
@@ -692,6 +691,7 @@ config X86_SUPPORTS_MEMORY_FAILURE
config STA2X11
bool "STA2X11 Companion Chip Support"
depends on X86_32_NON_STANDARD && PCI
+ select ARCH_HAS_PHYS_TO_DMA
select X86_DEV_DMA_OPS
select X86_DMA_REMAP
select SWIOTLB
diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
index 1295bc622ebe..1a19251eaac9 100644
--- a/arch/x86/include/asm/dma-direct.h
+++ b/arch/x86/include/asm/dma-direct.h
@@ -2,29 +2,8 @@
#ifndef ASM_X86_DMA_DIRECT_H
#define ASM_X86_DMA_DIRECT_H 1
-#include <linux/mem_encrypt.h>
-
-#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
-#else
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
- if (!dev->dma_mask)
- return 0;
-
- return addr + size - 1 <= *dev->dma_mask;
-}
-
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
- return __sme_set(paddr);
-}
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
-{
- return __sme_clr(daddr);
-}
-#endif /* CONFIG_X86_DMA_REMAP */
#endif /* ASM_X86_DMA_DIRECT_H */
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d243e8d80d89..1b396422d26f 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -211,7 +211,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
* Since we will be clearing the encryption bit, check the
* mask with it already cleared.
*/
- addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
+ addr = __phys_to_dma(dev, page_to_phys(page));
if ((addr + size) > dev->coherent_dma_mask) {
__free_pages(page, get_order(size));
} else {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index eac58e03f43c..7a5bafb76d77 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -207,11 +207,11 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
}
/**
- * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
+ * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
* @dev: device for a PCI device
* @paddr: Physical address
*/
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
if (!dev->archdata.is_sta2x11)
return paddr;
@@ -223,7 +223,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
* @dev: device for a PCI device
* @daddr: STA2x11 AMBA DMA address
*/
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
if (!dev->archdata.is_sta2x11)
return daddr;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index bcdb1a3e4b1f..53ad6a47f513 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -3,18 +3,19 @@
#define _LINUX_DMA_DIRECT_H 1
#include <linux/dma-mapping.h>
+#include <linux/mem_encrypt.h>
#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
#include <asm/dma-direct.h>
#else
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
dma_addr_t dev_addr = (dma_addr_t)paddr;
return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
}
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
{
phys_addr_t paddr = (phys_addr_t)dev_addr;
@@ -30,6 +31,22 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
}
#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
+/*
+ * If memory encryption is supported, phys_to_dma will set the memory encryption
+ * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma
+ * and __dma_to_phys versions should only be used on non-encrypted memory for
+ * special occasions like DMA coherent buffers.
+ */
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return __sme_set(__phys_to_dma(dev, paddr));
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+ return __sme_clr(__dma_to_phys(dev, daddr));
+}
+
#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN
void dma_mark_clean(void *addr, size_t size);
#else
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 005d1d87bb2e..8b06b4485e65 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
-/* For swiotlb, clear memory encryption mask from dma addresses */
-static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
- phys_addr_t address)
-{
- return __sme_clr(phys_to_dma(hwdev, address));
-}
-
/* Note that this doesn't work with highmem page */
static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
volatile void *address)
@@ -622,7 +615,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
return SWIOTLB_MAP_ERROR;
}
- start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
+ start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
dir, attrs);
}
@@ -726,12 +719,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
goto out_warn;
phys_addr = swiotlb_tbl_map_single(dev,
- swiotlb_phys_to_dma(dev, io_tlb_start),
+ __phys_to_dma(dev, io_tlb_start),
0, size, DMA_FROM_DEVICE, 0);
if (phys_addr == SWIOTLB_MAP_ERROR)
goto out_warn;
- *dma_handle = swiotlb_phys_to_dma(dev, phys_addr);
+ *dma_handle = __phys_to_dma(dev, phys_addr);
if (dma_coherent_ok(dev, *dma_handle, size))
goto out_unmap;
@@ -867,10 +860,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
map = map_single(dev, phys, size, dir, attrs);
if (map == SWIOTLB_MAP_ERROR) {
swiotlb_full(dev, size, dir, 1);
- return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+ return __phys_to_dma(dev, io_tlb_overflow_buffer);
}
- dev_addr = swiotlb_phys_to_dma(dev, map);
+ dev_addr = __phys_to_dma(dev, map);
/* Ensure that the address returned is DMA'ble */
if (dma_capable(dev, dev_addr, size))
@@ -879,7 +872,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
- return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+ return __phys_to_dma(dev, io_tlb_overflow_buffer);
}
/*
@@ -1009,7 +1002,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
sg_dma_len(sgl) = 0;
return 0;
}
- sg->dma_address = swiotlb_phys_to_dma(hwdev, map);
+ sg->dma_address = __phys_to_dma(hwdev, map);
} else
sg->dma_address = dev_addr;
sg_dma_len(sg) = sg->length;
@@ -1073,7 +1066,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
{
- return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
+ return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
}
/*
@@ -1085,7 +1078,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
int
swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
+ return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
}
#ifdef CONFIG_DMA_DIRECT_OPS
Commit-ID: c10f07aa27dadf5ab5b3d58c48c91a467f80db49
Gitweb: https://git.kernel.org/tip/c10f07aa27dadf5ab5b3d58c48c91a467f80db49
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:25 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:59 +0100
dma/direct: Handle force decryption for DMA coherent buffers in common code
With that in place the generic DMA-direct routines can be used to
allocate non-encrypted bounce buffers, and the x86 SEV case can use
the generic swiotlb ops including nice features such as using CMA
allocations.
Note that I'm not too happy about using sev_active() in DMA-direct, but
I couldn't come up with a good enough name for a wrapper to make it
worth adding.
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/mm/mem_encrypt.c | 73 ++---------------------------------------------
lib/dma-direct.c | 32 +++++++++++++++++----
2 files changed, 29 insertions(+), 76 deletions(-)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1b396422d26f..b2de398d1fd3 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -195,58 +195,6 @@ void __init sme_early_init(void)
swiotlb_force = SWIOTLB_FORCE;
}
-static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
-{
- unsigned int order;
- struct page *page;
- void *vaddr = NULL;
-
- order = get_order(size);
- page = alloc_pages_node(dev_to_node(dev), gfp, order);
- if (page) {
- dma_addr_t addr;
-
- /*
- * Since we will be clearing the encryption bit, check the
- * mask with it already cleared.
- */
- addr = __phys_to_dma(dev, page_to_phys(page));
- if ((addr + size) > dev->coherent_dma_mask) {
- __free_pages(page, get_order(size));
- } else {
- vaddr = page_address(page);
- *dma_handle = addr;
- }
- }
-
- if (!vaddr)
- vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
-
- if (!vaddr)
- return NULL;
-
- /* Clear the SME encryption bit for DMA use if not swiotlb area */
- if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) {
- set_memory_decrypted((unsigned long)vaddr, 1 << order);
- memset(vaddr, 0, PAGE_SIZE << order);
- *dma_handle = __sme_clr(*dma_handle);
- }
-
- return vaddr;
-}
-
-static void sev_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- /* Set the SME encryption bit for re-use if not swiotlb area */
- if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle)))
- set_memory_encrypted((unsigned long)vaddr,
- 1 << get_order(size));
-
- swiotlb_free_coherent(dev, size, vaddr, dma_handle);
-}
-
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
{
pgprot_t old_prot, new_prot;
@@ -399,20 +347,6 @@ bool sev_active(void)
}
EXPORT_SYMBOL(sev_active);
-static const struct dma_map_ops sev_dma_ops = {
- .alloc = sev_alloc,
- .free = sev_free,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = swiotlb_dma_mapping_error,
-};
-
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void)
{
@@ -423,12 +357,11 @@ void __init mem_encrypt_init(void)
swiotlb_update_mem_attributes();
/*
- * With SEV, DMA operations cannot use encryption. New DMA ops
- * are required in order to mark the DMA areas as decrypted or
- * to use bounce buffers.
+ * With SEV, DMA operations cannot use encryption, we need to use
+ * SWIOTLB to bounce buffer DMA operation.
*/
if (sev_active())
- dma_ops = &sev_dma_ops;
+ dma_ops = &swiotlb_dma_ops;
/*
* With SEV, we need to unroll the rep string I/O instructions.
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index c9e8e21cb334..1277d293d4da 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -9,6 +9,7 @@
#include <linux/scatterlist.h>
#include <linux/dma-contiguous.h>
#include <linux/pfn.h>
+#include <linux/set_memory.h>
#define DIRECT_MAPPING_ERROR 0
@@ -20,6 +21,14 @@
#define ARCH_ZONE_DMA_BITS 24
#endif
+/*
+ * For AMD SEV all DMA must be to unencrypted addresses.
+ */
+static inline bool force_dma_unencrypted(void)
+{
+ return sev_active();
+}
+
static bool
check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
const char *caller)
@@ -37,7 +46,9 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
{
- return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask;
+ dma_addr_t addr = force_dma_unencrypted() ?
+ __phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
+ return addr + size - 1 <= dev->coherent_dma_mask;
}
void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
@@ -46,6 +57,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
int page_order = get_order(size);
struct page *page = NULL;
+ void *ret;
/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
@@ -78,10 +90,15 @@ again:
if (!page)
return NULL;
-
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
- memset(page_address(page), 0, size);
- return page_address(page);
+ ret = page_address(page);
+ if (force_dma_unencrypted()) {
+ set_memory_decrypted((unsigned long)ret, 1 << page_order);
+ *dma_handle = __phys_to_dma(dev, page_to_phys(page));
+ } else {
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
+ }
+ memset(ret, 0, size);
+ return ret;
}
/*
@@ -92,9 +109,12 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned int page_order = get_order(size);
+ if (force_dma_unencrypted())
+ set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
- free_pages((unsigned long)cpu_addr, get_order(size));
+ free_pages((unsigned long)cpu_addr, page_order);
}
static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
Commit-ID: 16e73adbca76fd18733278cb688b0ddb4cad162c
Gitweb: https://git.kernel.org/tip/16e73adbca76fd18733278cb688b0ddb4cad162c
Author: Christoph Hellwig <[email protected]>
AuthorDate: Mon, 19 Mar 2018 11:38:26 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Mar 2018 10:01:59 +0100
dma/swiotlb: Remove swiotlb_{alloc,free}_coherent()
Unused now that everyone uses swiotlb_{alloc,free}().
Tested-by: Tom Lendacky <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Muli Ben-Yehuda <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
include/linux/swiotlb.h | 8 --------
lib/swiotlb.c | 38 --------------------------------------
2 files changed, 46 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 5b1f2a00491c..965be92c33b5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -72,14 +72,6 @@ void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle,
void swiotlb_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs);
-extern void
-*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags);
-
-extern void
-swiotlb_free_coherent(struct device *hwdev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 8b06b4485e65..15954b86f09e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
-/* Note that this doesn't work with highmem page */
-static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
- volatile void *address)
-{
- return phys_to_dma(hwdev, virt_to_phys(address));
-}
-
static bool no_iotlb_memory;
void swiotlb_print_info(void)
@@ -752,28 +745,6 @@ out_warn:
return NULL;
}
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags)
-{
- int order = get_order(size);
- unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0;
- void *ret;
-
- ret = (void *)__get_free_pages(flags, order);
- if (ret) {
- *dma_handle = swiotlb_virt_to_bus(hwdev, ret);
- if (dma_coherent_ok(hwdev, *dma_handle, size)) {
- memset(ret, 0, size);
- return ret;
- }
- free_pages((unsigned long)ret, order);
- }
-
- return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs);
-}
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-
static bool swiotlb_free_buffer(struct device *dev, size_t size,
dma_addr_t dma_addr)
{
@@ -793,15 +764,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size,
return true;
}
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
- dma_addr_t dev_addr)
-{
- if (!swiotlb_free_buffer(hwdev, size, dev_addr))
- free_pages((unsigned long)vaddr, get_order(size));
-}
-EXPORT_SYMBOL(swiotlb_free_coherent);
-
static void
swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
int do_panic)