Hi all,
this patch series introduces support for GNTTABOP_cache_flush to perform
cache maintenance operation on foreign pages and reverts the current
code based on XENFEAT_grant_map_identity.
It also provides a very slow fallback by bouncing on the swiotlb buffer,
in case the hypercall is not available.
Changes in v4:
- remove outer_*_range call;
- introduce is_dma_coherent;
- use is_dma_coherent in arch/arm/xen/mm32.c;
- merge xen/mm32.c into xen/mm.c;
- xen/arm/arm64: introduce xen_arch_need_swiotlb;
- avoid bouncing dma map operations that involve foreign grants and
non-coherent devices if GNTTABOP_cache_flush is provided by Xen.
Changes in v3:
- fix the cache maintenance op call to match what Linux does natively;
- update the hypercall interface to match Xen.
Changes in v2:
- remove the definition of XENFEAT_grant_map_identity;
- update the hypercall interface to match Xen;
- call the interface on a single page at a time.
Stefano Stabellini (7):
xen/arm: remove handling of XENFEAT_grant_map_identity
xen/arm: remove outer_*_range call
[RFC] arm/arm64: introduce is_dma_coherent
xen/arm: use is_dma_coherent
xen/arm/arm64: merge xen/mm32.c into xen/mm.c
xen/arm/arm64: introduce xen_arch_need_swiotlb
xen/arm: introduce GNTTABOP_cache_flush
arch/arm/include/asm/dma-mapping.h | 6 +
arch/arm/include/asm/xen/page.h | 4 +
arch/arm/xen/Makefile | 2 +-
arch/arm/xen/enlighten.c | 5 -
arch/arm/xen/mm.c | 161 +++++++++++++++++++++-
arch/arm/xen/mm32.c | 202 ----------------------------
arch/arm64/include/asm/dma-mapping.h | 5 +
arch/arm64/include/asm/xen/page-coherent.h | 44 +-----
arch/x86/include/asm/xen/page.h | 7 +
drivers/xen/swiotlb-xen.c | 5 +-
include/xen/interface/features.h | 3 -
include/xen/interface/grant_table.h | 19 +++
12 files changed, 207 insertions(+), 256 deletions(-)
delete mode 100644 arch/arm/xen/mm32.c
Dom0 is not actually capable of issuing outer_inv_range or
outer_clean_range calls.
Signed-off-by: Stefano Stabellini <[email protected]>
---
arch/arm/xen/mm32.c | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
index a5a93fc..6153d61 100644
--- a/arch/arm/xen/mm32.c
+++ b/arch/arm/xen/mm32.c
@@ -61,9 +61,6 @@ static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
/* Cannot use __dma_page_dev_to_cpu because we don't have a
* struct page for handle */
- if (dir != DMA_TO_DEVICE)
- outer_inv_range(handle, handle + size);
-
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_unmap_area);
}
@@ -72,12 +69,6 @@ static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
{
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_map_area);
-
- if (dir == DMA_FROM_DEVICE) {
- outer_inv_range(handle, handle + size);
- } else {
- outer_clean_range(handle, handle + size);
- }
}
void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
--
1.7.10.4
Introduce support for new hypercall GNTTABOP_cache_flush.
Use it to perform cache flashing on pages used for dma when necessary.
If GNTTABOP_cache_flush is supported by the hypervisor, we don't need to
bounce dma map operations that involve foreign grants and non-coherent
devices.
Signed-off-by: Stefano Stabellini <[email protected]>
---
Changes in v4:
- add comment;
- avoid bouncing dma map operations that involve foreign grants and
non-coherent devices if GNTTABOP_cache_flush is provided by Xen.
Changes in v3:
- fix the cache maintenance op call to match what Linux does natively;
- update the hypercall interface to match Xen.
Changes in v2:
- update the hypercall interface to match Xen;
- call the interface on a single page at a time.
---
arch/arm/xen/mm.c | 41 ++++++++++++++++++++++++++++++-----
include/xen/interface/grant_table.h | 19 ++++++++++++++++
2 files changed, 54 insertions(+), 6 deletions(-)
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 0c2a75a..21db123 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -11,6 +11,7 @@
#include <linux/swiotlb.h>
#include <xen/xen.h>
+#include <xen/interface/grant_table.h>
#include <xen/interface/memory.h>
#include <xen/swiotlb-xen.h>
@@ -44,6 +45,8 @@ static inline void *kmap_high_get(struct page *page)
static inline void kunmap_high(struct page *page) {}
#endif
+static bool hypercall_flush = false;
+
/* functions called by SWIOTLB */
@@ -60,17 +63,35 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
do {
size_t len = left;
void *vaddr;
+
+ /* buffers in highmem or foreign pages cannot cross page
+ * boundaries */
+ if (len + offset > PAGE_SIZE)
+ len = PAGE_SIZE - offset;
if (!pfn_valid(pfn))
{
- /* TODO: cache flush */
+ struct gnttab_cache_flush cflush;
+
+ cflush.op = 0;
+ cflush.a.dev_bus_addr = pfn << PAGE_SHIFT;
+ cflush.offset = offset;
+ cflush.length = len;
+
+ if (op == dmac_unmap_area && dir != DMA_TO_DEVICE)
+ cflush.op = GNTTAB_CACHE_INVAL;
+ if (op == dmac_map_area) {
+ if (dir == DMA_FROM_DEVICE)
+ cflush.op = GNTTAB_CACHE_INVAL;
+ else
+ cflush.op = GNTTAB_CACHE_CLEAN;
+ }
+ if (cflush.op)
+ HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
} else {
struct page *page = pfn_to_page(pfn);
if (PageHighMem(page)) {
- if (len + offset > PAGE_SIZE)
- len = PAGE_SIZE - offset;
-
if (cache_is_vipt_nonaliasing()) {
vaddr = kmap_atomic(page);
op(vaddr + offset, len, dir);
@@ -143,7 +164,7 @@ bool xen_arch_need_swiotlb(struct device *dev,
unsigned long pfn,
unsigned long mfn)
{
- return ((pfn != mfn) && !is_dma_coherent(dev));
+ return (!hypercall_flush && (pfn != mfn) && !is_dma_coherent(dev));
}
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
@@ -186,10 +207,18 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
int __init xen_mm_init(void)
{
+ struct gnttab_cache_flush cflush;
if (!xen_initial_domain())
return 0;
xen_swiotlb_init(1, false);
xen_dma_ops = &xen_swiotlb_dma_ops;
+
+ cflush.op = 0;
+ cflush.a.dev_bus_addr = 0;
+ cflush.offset = 0;
+ cflush.length = 0;
+ if (HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1) != -ENOSYS)
+ hypercall_flush = true;
return 0;
}
-arch_initcall(xen_mm_init);
+arch_initcall(xen_mm_init)
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
index e40fae9..bcce564 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -479,6 +479,25 @@ struct gnttab_get_version {
DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
/*
+ * Issue one or more cache maintenance operations on a portion of a
+ * page granted to the calling domain by a foreign domain.
+ */
+#define GNTTABOP_cache_flush 12
+struct gnttab_cache_flush {
+ union {
+ uint64_t dev_bus_addr;
+ grant_ref_t ref;
+ } a;
+ uint16_t offset; /* offset from start of grant */
+ uint16_t length; /* size within the grant */
+#define GNTTAB_CACHE_CLEAN (1<<0)
+#define GNTTAB_CACHE_INVAL (1<<1)
+#define GNTTAB_CACHE_SOURCE_GREF (1<<31)
+ uint32_t op;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush);
+
+/*
* Bitfield values for update_pin_status.flags.
*/
/* Map the grant entry for access by I/O devices. */
--
1.7.10.4
The feature has been removed from Xen. Also Linux cannot use it on ARM32
without CONFIG_ARM_LPAE.
Signed-off-by: Stefano Stabellini <[email protected]>
Reviewed-by: David Vrabel <[email protected]>
---
Changes in v2:
- remove the definition of XENFEAT_grant_map_identity.
---
arch/arm/xen/enlighten.c | 5 ---
arch/arm/xen/mm32.c | 85 +-------------------------------------
include/xen/interface/features.h | 3 --
3 files changed, 1 insertion(+), 92 deletions(-)
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 0e15f01..c7ca936 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -261,11 +261,6 @@ static int __init xen_guest_init(void)
xen_setup_features();
- if (!xen_feature(XENFEAT_grant_map_identity)) {
- pr_warn("Please upgrade your Xen.\n"
- "If your platform has any non-coherent DMA devices, they won't work properly.\n");
- }
-
if (xen_feature(XENFEAT_dom0))
xen_start_info->flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
else
diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
index 3b99860..a5a93fc 100644
--- a/arch/arm/xen/mm32.c
+++ b/arch/arm/xen/mm32.c
@@ -5,70 +5,6 @@
#include <xen/features.h>
-static DEFINE_PER_CPU(unsigned long, xen_mm32_scratch_virt);
-static DEFINE_PER_CPU(pte_t *, xen_mm32_scratch_ptep);
-
-static int alloc_xen_mm32_scratch_page(int cpu)
-{
- struct page *page;
- unsigned long virt;
- pmd_t *pmdp;
- pte_t *ptep;
-
- if (per_cpu(xen_mm32_scratch_ptep, cpu) != NULL)
- return 0;
-
- page = alloc_page(GFP_KERNEL);
- if (page == NULL) {
- pr_warn("Failed to allocate xen_mm32_scratch_page for cpu %d\n", cpu);
- return -ENOMEM;
- }
-
- virt = (unsigned long)__va(page_to_phys(page));
- pmdp = pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
- ptep = pte_offset_kernel(pmdp, virt);
-
- per_cpu(xen_mm32_scratch_virt, cpu) = virt;
- per_cpu(xen_mm32_scratch_ptep, cpu) = ptep;
-
- return 0;
-}
-
-static int xen_mm32_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- int cpu = (long)hcpu;
- switch (action) {
- case CPU_UP_PREPARE:
- if (alloc_xen_mm32_scratch_page(cpu))
- return NOTIFY_BAD;
- break;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block xen_mm32_cpu_notifier = {
- .notifier_call = xen_mm32_cpu_notify,
-};
-
-static void* xen_mm32_remap_page(dma_addr_t handle)
-{
- unsigned long virt = get_cpu_var(xen_mm32_scratch_virt);
- pte_t *ptep = __get_cpu_var(xen_mm32_scratch_ptep);
-
- *ptep = pfn_pte(handle >> PAGE_SHIFT, PAGE_KERNEL);
- local_flush_tlb_kernel_page(virt);
-
- return (void*)virt;
-}
-
-static void xen_mm32_unmap(void *vaddr)
-{
- put_cpu_var(xen_mm32_scratch_virt);
-}
-
/* functions called by SWIOTLB */
@@ -88,13 +24,7 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
if (!pfn_valid(pfn))
{
- /* Cannot map the page, we don't know its physical address.
- * Return and hope for the best */
- if (!xen_feature(XENFEAT_grant_map_identity))
- return;
- vaddr = xen_mm32_remap_page(handle) + offset;
- op(vaddr, len, dir);
- xen_mm32_unmap(vaddr - offset);
+ /* TODO: cache flush */
} else {
struct page *page = pfn_to_page(pfn);
@@ -181,22 +111,9 @@ void xen_dma_sync_single_for_device(struct device *hwdev,
int __init xen_mm32_init(void)
{
- int cpu;
-
if (!xen_initial_domain())
return 0;
- register_cpu_notifier(&xen_mm32_cpu_notifier);
- get_online_cpus();
- for_each_online_cpu(cpu) {
- if (alloc_xen_mm32_scratch_page(cpu)) {
- put_online_cpus();
- unregister_cpu_notifier(&xen_mm32_cpu_notifier);
- return -ENOMEM;
- }
- }
- put_online_cpus();
-
return 0;
}
arch_initcall(xen_mm32_init);
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
index 14334d0..131a6cc 100644
--- a/include/xen/interface/features.h
+++ b/include/xen/interface/features.h
@@ -53,9 +53,6 @@
/* operation as Dom0 is supported */
#define XENFEAT_dom0 11
-/* Xen also maps grant references at pfn = mfn */
-#define XENFEAT_grant_map_identity 12
-
#define XENFEAT_NR_SUBMAPS 1
#endif /* __XEN_PUBLIC_FEATURES_H__ */
--
1.7.10.4
Merge xen/mm32.c into xen/mm.c.
As a consequence the code gets compiled on arm64 too: introduce a few
compat functions to actually be able to compile it.
Signed-off-by: Stefano Stabellini <[email protected]>
---
arch/arm/xen/Makefile | 2 +-
arch/arm/xen/mm.c | 123 ++++++++++++++++++++++++++++
arch/arm/xen/mm32.c | 110 -------------------------
arch/arm64/include/asm/xen/page-coherent.h | 44 +---------
4 files changed, 125 insertions(+), 154 deletions(-)
delete mode 100644 arch/arm/xen/mm32.c
diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile
index 1f85bfe..1296952 100644
--- a/arch/arm/xen/Makefile
+++ b/arch/arm/xen/Makefile
@@ -1 +1 @@
-obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o mm32.o
+obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index b0e77de..9d460e0 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -1,5 +1,8 @@
+#include <linux/cpu.h>
+#include <linux/dma-mapping.h>
#include <linux/bootmem.h>
#include <linux/gfp.h>
+#include <linux/highmem.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -16,6 +19,126 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
+
+#ifdef CONFIG_ARM64
+static inline void dmac_map_area(const void *start, size_t size, int dir)
+{
+ return __dma_map_area(start, size, dir);
+}
+
+static inline void dmac_unmap_area(const void *start, size_t size, int dir)
+{
+ return __dma_unmap_area(start, size, dir);
+}
+
+static inline bool cache_is_vipt_nonaliasing(void)
+{
+ return true;
+}
+
+static inline void *kmap_high_get(struct page *page)
+{
+ return NULL;
+}
+
+static inline void kunmap_high(struct page *page) {}
+#endif
+
+
+/* functions called by SWIOTLB */
+
+static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
+ size_t size, enum dma_data_direction dir,
+ void (*op)(const void *, size_t, int))
+{
+ unsigned long pfn;
+ size_t left = size;
+
+ pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE;
+ offset %= PAGE_SIZE;
+
+ do {
+ size_t len = left;
+ void *vaddr;
+
+ if (!pfn_valid(pfn))
+ {
+ /* TODO: cache flush */
+ } else {
+ struct page *page = pfn_to_page(pfn);
+
+ if (PageHighMem(page)) {
+ if (len + offset > PAGE_SIZE)
+ len = PAGE_SIZE - offset;
+
+ if (cache_is_vipt_nonaliasing()) {
+ vaddr = kmap_atomic(page);
+ op(vaddr + offset, len, dir);
+ kunmap_atomic(vaddr);
+ } else {
+ vaddr = kmap_high_get(page);
+ if (vaddr) {
+ op(vaddr + offset, len, dir);
+ kunmap_high(page);
+ }
+ }
+ } else {
+ vaddr = page_address(page) + offset;
+ op(vaddr, len, dir);
+ }
+ }
+
+ offset = 0;
+ pfn++;
+ left -= len;
+ } while (left);
+}
+
+static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir)
+{
+ /* Cannot use __dma_page_dev_to_cpu because we don't have a
+ * struct page for handle */
+
+ dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_unmap_area);
+}
+
+static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir)
+{
+
+ dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_map_area);
+}
+
+void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+
+{
+ if (is_dma_coherent(hwdev))
+ return;
+ if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ return;
+
+ __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
+}
+
+void xen_dma_sync_single_for_cpu(struct device *hwdev,
+ dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+ if (is_dma_coherent(hwdev))
+ return;
+ __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
+}
+
+void xen_dma_sync_single_for_device(struct device *hwdev,
+ dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+ if (is_dma_coherent(hwdev))
+ return;
+ __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
+}
+
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits,
dma_addr_t *dma_handle)
diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
deleted file mode 100644
index c7fa673..0000000
--- a/arch/arm/xen/mm32.c
+++ /dev/null
@@ -1,110 +0,0 @@
-#include <linux/cpu.h>
-#include <linux/dma-mapping.h>
-#include <linux/gfp.h>
-#include <linux/highmem.h>
-
-#include <xen/features.h>
-
-
-/* functions called by SWIOTLB */
-
-static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
- size_t size, enum dma_data_direction dir,
- void (*op)(const void *, size_t, int))
-{
- unsigned long pfn;
- size_t left = size;
-
- pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE;
- offset %= PAGE_SIZE;
-
- do {
- size_t len = left;
- void *vaddr;
-
- if (!pfn_valid(pfn))
- {
- /* TODO: cache flush */
- } else {
- struct page *page = pfn_to_page(pfn);
-
- if (PageHighMem(page)) {
- if (len + offset > PAGE_SIZE)
- len = PAGE_SIZE - offset;
-
- if (cache_is_vipt_nonaliasing()) {
- vaddr = kmap_atomic(page);
- op(vaddr + offset, len, dir);
- kunmap_atomic(vaddr);
- } else {
- vaddr = kmap_high_get(page);
- if (vaddr) {
- op(vaddr + offset, len, dir);
- kunmap_high(page);
- }
- }
- } else {
- vaddr = page_address(page) + offset;
- op(vaddr, len, dir);
- }
- }
-
- offset = 0;
- pfn++;
- left -= len;
- } while (left);
-}
-
-static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir)
-{
- /* Cannot use __dma_page_dev_to_cpu because we don't have a
- * struct page for handle */
-
- dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_unmap_area);
-}
-
-static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir)
-{
-
- dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_map_area);
-}
-
-void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
-
-{
- if (is_dma_coherent(hwdev))
- return;
- if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
- return;
-
- __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
-}
-
-void xen_dma_sync_single_for_cpu(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
- if (is_dma_coherent(hwdev))
- return;
- __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
-}
-
-void xen_dma_sync_single_for_device(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
- if (is_dma_coherent(hwdev))
- return;
- __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
-}
-
-int __init xen_mm32_init(void)
-{
- if (!xen_initial_domain())
- return 0;
-
- return 0;
-}
-arch_initcall(xen_mm32_init);
diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h
index dde3fc9..2052102 100644
--- a/arch/arm64/include/asm/xen/page-coherent.h
+++ b/arch/arm64/include/asm/xen/page-coherent.h
@@ -1,43 +1 @@
-#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H
-#define _ASM_ARM64_XEN_PAGE_COHERENT_H
-
-#include <asm/page.h>
-#include <linux/dma-attrs.h>
-#include <linux/dma-mapping.h>
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs)
-{
- return __generic_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- __generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
-}
-
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
- unsigned long offset, size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
-{
-}
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
-{
-}
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-}
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-}
-#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */
+#include <../../arm/include/asm/xen/page-coherent.h>
--
1.7.10.4
Introduce an arch specific function to find out whether a particular dma
mapping operation needs to bounce on the swiotlb buffer.
On ARM and ARM64, if the page involved is a foreign page and the device
is not coherent, we need to bounce because at unmap time we cannot
execute any required cache maintenance operations (we don't know how to
find the pfn from the mfn).
No change of behaviour for x86.
Signed-off-by: Stefano Stabellini <[email protected]>
---
arch/arm/include/asm/xen/page.h | 4 ++++
arch/arm/xen/mm.c | 7 +++++++
arch/x86/include/asm/xen/page.h | 7 +++++++
drivers/xen/swiotlb-xen.c | 5 ++++-
4 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 135c24a..fb7839d 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -107,4 +107,8 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
#define xen_remap(cookie, size) ioremap_cache((cookie), (size))
#define xen_unmap(cookie) iounmap((cookie))
+bool xen_arch_need_swiotlb(struct device *dev,
+ unsigned long pfn,
+ unsigned long mfn);
+
#endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 9d460e0..0c2a75a 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -139,6 +139,13 @@ void xen_dma_sync_single_for_device(struct device *hwdev,
__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
}
+bool xen_arch_need_swiotlb(struct device *dev,
+ unsigned long pfn,
+ unsigned long mfn)
+{
+ return ((pfn != mfn) && !is_dma_coherent(dev));
+}
+
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits,
dma_addr_t *dma_handle)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c949923..0a45242 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -236,4 +236,11 @@ void make_lowmem_page_readwrite(void *vaddr);
#define xen_remap(cookie, size) ioremap((cookie), (size));
#define xen_unmap(cookie) iounmap((cookie))
+static inline bool xen_arch_need_swiotlb(struct device *dev,
+ unsigned long pfn,
+ unsigned long mfn)
+{
+ return false;
+}
+
#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index ebd8f21..ac0bd60 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -399,7 +399,9 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
* buffering it.
*/
if (dma_capable(dev, dev_addr, size) &&
- !range_straddles_page_boundary(phys, size) && !swiotlb_force) {
+ !range_straddles_page_boundary(phys, size) &&
+ !xen_arch_need_swiotlb(dev, PFN_DOWN(phys), PFN_DOWN(dev_addr)) &&
+ !swiotlb_force) {
/* we are not interested in the dma_addr returned by
* xen_dma_map_page, only in the potential cache flushes executed
* by the function. */
@@ -557,6 +559,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
dma_addr_t dev_addr = xen_phys_to_bus(paddr);
if (swiotlb_force ||
+ xen_arch_need_swiotlb(hwdev, PFN_DOWN(paddr), PFN_DOWN(dev_addr)) ||
!dma_capable(hwdev, dev_addr, sg->length) ||
range_straddles_page_boundary(paddr, sg->length)) {
phys_addr_t map = swiotlb_tbl_map_single(hwdev,
--
1.7.10.4
Introduce a function to check whether a device is dma coherent.
Signed-off-by: Stefano Stabellini <[email protected]>
CC: [email protected]
CC: [email protected]
CC: [email protected]
CC: [email protected]
---
arch/arm/include/asm/dma-mapping.h | 6 ++++++
arch/arm64/include/asm/dma-mapping.h | 5 +++++
2 files changed, 11 insertions(+)
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index c45b61a..bededbb 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -126,6 +126,12 @@ static inline int set_arch_dma_coherent_ops(struct device *dev)
set_dma_ops(dev, &arm_coherent_dma_ops);
return 0;
}
+
+static inline bool is_dma_coherent(struct device *dev)
+{
+ return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
+}
+
#define set_arch_dma_coherent_ops(dev) set_arch_dma_coherent_ops(dev)
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index dc82e52..d51d67e 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -52,6 +52,11 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
dev->archdata.dma_ops = ops;
}
+static inline bool is_dma_coherent(struct device *dev)
+{
+ return (__generic_dma_ops(dev) == &coherent_swiotlb_dma_ops);
+}
+
#include <asm-generic/dma-mapping-common.h>
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
--
1.7.10.4
Use is_dma_coherent to check whether we need to issue cache maintenance
operations rather than checking on the existence of a particular
dma_ops function for the device.
Signed-off-by: Stefano Stabellini <[email protected]>
---
arch/arm/xen/mm32.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
index 6153d61..c7fa673 100644
--- a/arch/arm/xen/mm32.c
+++ b/arch/arm/xen/mm32.c
@@ -76,7 +76,7 @@ void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
struct dma_attrs *attrs)
{
- if (!__generic_dma_ops(hwdev)->unmap_page)
+ if (is_dma_coherent(hwdev))
return;
if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
return;
@@ -87,7 +87,7 @@ void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
- if (!__generic_dma_ops(hwdev)->sync_single_for_cpu)
+ if (is_dma_coherent(hwdev))
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
}
@@ -95,7 +95,7 @@ void xen_dma_sync_single_for_cpu(struct device *hwdev,
void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
- if (!__generic_dma_ops(hwdev)->sync_single_for_device)
+ if (is_dma_coherent(hwdev))
return;
__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
}
--
1.7.10.4
On Fri, Oct 10, 2014 at 12:51:44PM +0100, Stefano Stabellini wrote:
> Introduce a function to check whether a device is dma coherent.
>
> Signed-off-by: Stefano Stabellini <[email protected]>
> CC: [email protected]
> CC: [email protected]
> CC: [email protected]
> CC: [email protected]
> ---
> arch/arm/include/asm/dma-mapping.h | 6 ++++++
> arch/arm64/include/asm/dma-mapping.h | 5 +++++
> 2 files changed, 11 insertions(+)
>
> diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> index c45b61a..bededbb 100644
> --- a/arch/arm/include/asm/dma-mapping.h
> +++ b/arch/arm/include/asm/dma-mapping.h
> @@ -126,6 +126,12 @@ static inline int set_arch_dma_coherent_ops(struct device *dev)
> set_dma_ops(dev, &arm_coherent_dma_ops);
> return 0;
> }
> +
> +static inline bool is_dma_coherent(struct device *dev)
> +{
> + return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
> +}
Hmm, what about the IOMMU ops?
Will
On Fri, 10 Oct 2014, Will Deacon wrote:
> On Fri, Oct 10, 2014 at 12:51:44PM +0100, Stefano Stabellini wrote:
> > Introduce a function to check whether a device is dma coherent.
> >
> > Signed-off-by: Stefano Stabellini <[email protected]>
> > CC: [email protected]
> > CC: [email protected]
> > CC: [email protected]
> > CC: [email protected]
> > ---
> > arch/arm/include/asm/dma-mapping.h | 6 ++++++
> > arch/arm64/include/asm/dma-mapping.h | 5 +++++
> > 2 files changed, 11 insertions(+)
> >
> > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> > index c45b61a..bededbb 100644
> > --- a/arch/arm/include/asm/dma-mapping.h
> > +++ b/arch/arm/include/asm/dma-mapping.h
> > @@ -126,6 +126,12 @@ static inline int set_arch_dma_coherent_ops(struct device *dev)
> > set_dma_ops(dev, &arm_coherent_dma_ops);
> > return 0;
> > }
> > +
> > +static inline bool is_dma_coherent(struct device *dev)
> > +{
> > + return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
> > +}
>
> Hmm, what about the IOMMU ops?
Maybe I should check __generic_dma_ops(dev) != &arm_dma_ops?
Do you have any better suggestions?
On Fri, 10 Oct 2014, Stefano Stabellini wrote:
> On Fri, 10 Oct 2014, Will Deacon wrote:
> > On Fri, Oct 10, 2014 at 12:51:44PM +0100, Stefano Stabellini wrote:
> > > Introduce a function to check whether a device is dma coherent.
> > >
> > > Signed-off-by: Stefano Stabellini <[email protected]>
> > > CC: [email protected]
> > > CC: [email protected]
> > > CC: [email protected]
> > > CC: [email protected]
> > > ---
> > > arch/arm/include/asm/dma-mapping.h | 6 ++++++
> > > arch/arm64/include/asm/dma-mapping.h | 5 +++++
> > > 2 files changed, 11 insertions(+)
> > >
> > > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> > > index c45b61a..bededbb 100644
> > > --- a/arch/arm/include/asm/dma-mapping.h
> > > +++ b/arch/arm/include/asm/dma-mapping.h
> > > @@ -126,6 +126,12 @@ static inline int set_arch_dma_coherent_ops(struct device *dev)
> > > set_dma_ops(dev, &arm_coherent_dma_ops);
> > > return 0;
> > > }
> > > +
> > > +static inline bool is_dma_coherent(struct device *dev)
> > > +{
> > > + return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
> > > +}
> >
> > Hmm, what about the IOMMU ops?
>
> Maybe I should check __generic_dma_ops(dev) != &arm_dma_ops?
> Do you have any better suggestions?
ping?
On Mon, Oct 13, 2014 at 12:16:14PM +0100, Stefano Stabellini wrote:
> On Fri, 10 Oct 2014, Stefano Stabellini wrote:
> > On Fri, 10 Oct 2014, Will Deacon wrote:
> > > On Fri, Oct 10, 2014 at 12:51:44PM +0100, Stefano Stabellini wrote:
> > > > Introduce a function to check whether a device is dma coherent.
> > > >
> > > > Signed-off-by: Stefano Stabellini <[email protected]>
> > > > CC: [email protected]
> > > > CC: [email protected]
> > > > CC: [email protected]
> > > > CC: [email protected]
> > > > ---
> > > > arch/arm/include/asm/dma-mapping.h | 6 ++++++
> > > > arch/arm64/include/asm/dma-mapping.h | 5 +++++
> > > > 2 files changed, 11 insertions(+)
> > > >
> > > > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> > > > index c45b61a..bededbb 100644
> > > > --- a/arch/arm/include/asm/dma-mapping.h
> > > > +++ b/arch/arm/include/asm/dma-mapping.h
> > > > @@ -126,6 +126,12 @@ static inline int set_arch_dma_coherent_ops(struct device *dev)
> > > > set_dma_ops(dev, &arm_coherent_dma_ops);
> > > > return 0;
> > > > }
> > > > +
> > > > +static inline bool is_dma_coherent(struct device *dev)
> > > > +{
> > > > + return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
> > > > +}
> > >
> > > Hmm, what about the IOMMU ops?
> >
> > Maybe I should check __generic_dma_ops(dev) != &arm_dma_ops?
> > Do you have any better suggestions?
>
> ping?
Without any clear idea about why this is needed or how it's used, I don't
have any better ideas.
Will
On Mon, 13 Oct 2014, Will Deacon wrote:
> On Mon, Oct 13, 2014 at 12:16:14PM +0100, Stefano Stabellini wrote:
> > On Fri, 10 Oct 2014, Stefano Stabellini wrote:
> > > On Fri, 10 Oct 2014, Will Deacon wrote:
> > > > On Fri, Oct 10, 2014 at 12:51:44PM +0100, Stefano Stabellini wrote:
> > > > > Introduce a function to check whether a device is dma coherent.
> > > > >
> > > > > Signed-off-by: Stefano Stabellini <[email protected]>
> > > > > CC: [email protected]
> > > > > CC: [email protected]
> > > > > CC: [email protected]
> > > > > CC: [email protected]
> > > > > ---
> > > > > arch/arm/include/asm/dma-mapping.h | 6 ++++++
> > > > > arch/arm64/include/asm/dma-mapping.h | 5 +++++
> > > > > 2 files changed, 11 insertions(+)
> > > > >
> > > > > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> > > > > index c45b61a..bededbb 100644
> > > > > --- a/arch/arm/include/asm/dma-mapping.h
> > > > > +++ b/arch/arm/include/asm/dma-mapping.h
> > > > > @@ -126,6 +126,12 @@ static inline int set_arch_dma_coherent_ops(struct device *dev)
> > > > > set_dma_ops(dev, &arm_coherent_dma_ops);
> > > > > return 0;
> > > > > }
> > > > > +
> > > > > +static inline bool is_dma_coherent(struct device *dev)
> > > > > +{
> > > > > + return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
> > > > > +}
> > > >
> > > > Hmm, what about the IOMMU ops?
> > >
> > > Maybe I should check __generic_dma_ops(dev) != &arm_dma_ops?
> > > Do you have any better suggestions?
> >
> > ping?
>
> Without any clear idea about why this is needed or how it's used, I don't
> have any better ideas.
Thanks for the quick reply.
It is used in dom0 to figure out whether the device is not coherent: in
that case Dom0 is going to issue an hypercall and Xen is going to
execute the required cache maintenance operations on Dom0's behalf.
In practice for this use case iommu_dma_ops is not a problem, as the
device is never going to appear as being behind an SMMU in dom0.
On Fri, Oct 10, 2014 at 12:51:47PM +0100, Stefano Stabellini wrote:
> Introduce an arch specific function to find out whether a particular dma
> mapping operation needs to bounce on the swiotlb buffer.
>
> On ARM and ARM64, if the page involved is a foreign page and the device
> is not coherent, we need to bounce because at unmap time we cannot
> execute any required cache maintenance operations (we don't know how to
> find the pfn from the mfn).
>
> No change of behaviour for x86.
>
> Signed-off-by: Stefano Stabellini <[email protected]>
> ---
> arch/arm/include/asm/xen/page.h | 4 ++++
> arch/arm/xen/mm.c | 7 +++++++
> arch/x86/include/asm/xen/page.h | 7 +++++++
> drivers/xen/swiotlb-xen.c | 5 ++++-
> 4 files changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
> index 135c24a..fb7839d 100644
> --- a/arch/arm/include/asm/xen/page.h
> +++ b/arch/arm/include/asm/xen/page.h
> @@ -107,4 +107,8 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
> #define xen_remap(cookie, size) ioremap_cache((cookie), (size))
> #define xen_unmap(cookie) iounmap((cookie))
>
> +bool xen_arch_need_swiotlb(struct device *dev,
> + unsigned long pfn,
> + unsigned long mfn);
> +
> #endif /* _ASM_ARM_XEN_PAGE_H */
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index 9d460e0..0c2a75a 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -139,6 +139,13 @@ void xen_dma_sync_single_for_device(struct device *hwdev,
> __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
> }
>
> +bool xen_arch_need_swiotlb(struct device *dev,
> + unsigned long pfn,
> + unsigned long mfn)
> +{
> + return ((pfn != mfn) && !is_dma_coherent(dev));
> +}
> +
> int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
> unsigned int address_bits,
> dma_addr_t *dma_handle)
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index c949923..0a45242 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -236,4 +236,11 @@ void make_lowmem_page_readwrite(void *vaddr);
> #define xen_remap(cookie, size) ioremap((cookie), (size));
> #define xen_unmap(cookie) iounmap((cookie))
>
> +static inline bool xen_arch_need_swiotlb(struct device *dev,
> + unsigned long pfn,
> + unsigned long mfn)
Something funny happend here.
> +{
> + return false;
> +}
Why not make this an macro?
The xen_unmap and xen_remap are that.
> +
> #endif /* _ASM_X86_XEN_PAGE_H */
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index ebd8f21..ac0bd60 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -399,7 +399,9 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
> * buffering it.
> */
> if (dma_capable(dev, dev_addr, size) &&
> - !range_straddles_page_boundary(phys, size) && !swiotlb_force) {
> + !range_straddles_page_boundary(phys, size) &&
> + !xen_arch_need_swiotlb(dev, PFN_DOWN(phys), PFN_DOWN(dev_addr)) &&
> + !swiotlb_force) {
> /* we are not interested in the dma_addr returned by
> * xen_dma_map_page, only in the potential cache flushes executed
> * by the function. */
> @@ -557,6 +559,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
> dma_addr_t dev_addr = xen_phys_to_bus(paddr);
>
> if (swiotlb_force ||
> + xen_arch_need_swiotlb(hwdev, PFN_DOWN(paddr), PFN_DOWN(dev_addr)) ||
> !dma_capable(hwdev, dev_addr, sg->length) ||
> range_straddles_page_boundary(paddr, sg->length)) {
> phys_addr_t map = swiotlb_tbl_map_single(hwdev,
> --
> 1.7.10.4
>
On 14/10/14 17:03, Konrad Rzeszutek Wilk wrote:
> On Fri, Oct 10, 2014 at 12:51:47PM +0100, Stefano Stabellini wrote:
>> Introduce an arch specific function to find out whether a particular dma
>> mapping operation needs to bounce on the swiotlb buffer.
>>
>> On ARM and ARM64, if the page involved is a foreign page and the device
>> is not coherent, we need to bounce because at unmap time we cannot
>> execute any required cache maintenance operations (we don't know how to
>> find the pfn from the mfn).
...
>> --- a/arch/x86/include/asm/xen/page.h
>> +++ b/arch/x86/include/asm/xen/page.h
>> @@ -236,4 +236,11 @@ void make_lowmem_page_readwrite(void *vaddr);
>> #define xen_remap(cookie, size) ioremap((cookie), (size));
>> #define xen_unmap(cookie) iounmap((cookie))
>>
>> +static inline bool xen_arch_need_swiotlb(struct device *dev,
>> + unsigned long pfn,
>> + unsigned long mfn)
>> +{
>> + return false;
>> +}
>
> Why not make this an macro?
Because macros are evil and inline functions are preferred.
David
On Tue, Oct 14, 2014 at 05:12:12PM +0100, David Vrabel wrote:
> On 14/10/14 17:03, Konrad Rzeszutek Wilk wrote:
> > On Fri, Oct 10, 2014 at 12:51:47PM +0100, Stefano Stabellini wrote:
> >> Introduce an arch specific function to find out whether a particular dma
> >> mapping operation needs to bounce on the swiotlb buffer.
> >>
> >> On ARM and ARM64, if the page involved is a foreign page and the device
> >> is not coherent, we need to bounce because at unmap time we cannot
> >> execute any required cache maintenance operations (we don't know how to
> >> find the pfn from the mfn).
> ...
> >> --- a/arch/x86/include/asm/xen/page.h
> >> +++ b/arch/x86/include/asm/xen/page.h
> >> @@ -236,4 +236,11 @@ void make_lowmem_page_readwrite(void *vaddr);
> >> #define xen_remap(cookie, size) ioremap((cookie), (size));
> >> #define xen_unmap(cookie) iounmap((cookie))
> >>
> >> +static inline bool xen_arch_need_swiotlb(struct device *dev,
> >> + unsigned long pfn,
> >> + unsigned long mfn)
> >> +{
> >> + return false;
> >> +}
> >
> > Why not make this an macro?
>
> Because macros are evil and inline functions are preferred.
<laughs>
Fair enough.
>
> David
On Fri, 2014-10-10 at 12:51 +0100, Stefano Stabellini wrote:
> The feature has been removed from Xen. Also Linux cannot use it on ARM32
> without CONFIG_ARM_LPAE.
>
> Signed-off-by: Stefano Stabellini <[email protected]>
> Reviewed-by: David Vrabel <[email protected]>
Acked-by: Ian Campbell <[email protected]>
On Fri, 2014-10-10 at 12:51 +0100, Stefano Stabellini wrote:
> Use is_dma_coherent to check whether we need to issue cache maintenance
> operations rather than checking on the existence of a particular
> dma_ops function for the device.
>
> Signed-off-by: Stefano Stabellini <[email protected]>
Acked-by: Ian Campbell <[email protected]>
On Fri, 2014-10-10 at 12:51 +0100, Stefano Stabellini wrote:
> Merge xen/mm32.c into xen/mm.c.
> As a consequence the code gets compiled on arm64 too: introduce a few
> compat functions to actually be able to compile it.
>
> Signed-off-by: Stefano Stabellini <[email protected]>
Acked-by: Ian Campbell <[email protected]>
On Fri, 2014-10-10 at 12:51 +0100, Stefano Stabellini wrote:
> Introduce support for new hypercall GNTTABOP_cache_flush.
> Use it to perform cache flashing on pages used for dma when necessary.
>
> If GNTTABOP_cache_flush is supported by the hypervisor, we don't need to
> bounce dma map operations that involve foreign grants and non-coherent
> devices.
>
> Signed-off-by: Stefano Stabellini <[email protected]>
>
> ---
>
> Changes in v4:
> - add comment;
> - avoid bouncing dma map operations that involve foreign grants and
> non-coherent devices if GNTTABOP_cache_flush is provided by Xen.
>
> Changes in v3:
> - fix the cache maintenance op call to match what Linux does natively;
> - update the hypercall interface to match Xen.
>
> Changes in v2:
> - update the hypercall interface to match Xen;
> - call the interface on a single page at a time.
> ---
> arch/arm/xen/mm.c | 41 ++++++++++++++++++++++++++++++-----
> include/xen/interface/grant_table.h | 19 ++++++++++++++++
> 2 files changed, 54 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index 0c2a75a..21db123 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -11,6 +11,7 @@
> #include <linux/swiotlb.h>
>
> #include <xen/xen.h>
> +#include <xen/interface/grant_table.h>
> #include <xen/interface/memory.h>
> #include <xen/swiotlb-xen.h>
>
> @@ -44,6 +45,8 @@ static inline void *kmap_high_get(struct page *page)
> static inline void kunmap_high(struct page *page) {}
> #endif
>
> +static bool hypercall_flush = false;
Would be nice to include the word "cache" (or at least cflush) in this.
> -arch_initcall(xen_mm_init);
> +arch_initcall(xen_mm_init)
I think this is stray?
Acked-by: Ian Campbell <[email protected]>
On Mon, 13 Oct 2014, Stefano Stabellini wrote:
> On Mon, 13 Oct 2014, Will Deacon wrote:
> > On Mon, Oct 13, 2014 at 12:16:14PM +0100, Stefano Stabellini wrote:
> > > On Fri, 10 Oct 2014, Stefano Stabellini wrote:
> > > > On Fri, 10 Oct 2014, Will Deacon wrote:
> > > > > On Fri, Oct 10, 2014 at 12:51:44PM +0100, Stefano Stabellini wrote:
> > > > > > Introduce a function to check whether a device is dma coherent.
> > > > > >
> > > > > > Signed-off-by: Stefano Stabellini <[email protected]>
> > > > > > CC: [email protected]
> > > > > > CC: [email protected]
> > > > > > CC: [email protected]
> > > > > > CC: [email protected]
> > > > > > ---
> > > > > > arch/arm/include/asm/dma-mapping.h | 6 ++++++
> > > > > > arch/arm64/include/asm/dma-mapping.h | 5 +++++
> > > > > > 2 files changed, 11 insertions(+)
> > > > > >
> > > > > > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> > > > > > index c45b61a..bededbb 100644
> > > > > > --- a/arch/arm/include/asm/dma-mapping.h
> > > > > > +++ b/arch/arm/include/asm/dma-mapping.h
> > > > > > @@ -126,6 +126,12 @@ static inline int set_arch_dma_coherent_ops(struct device *dev)
> > > > > > set_dma_ops(dev, &arm_coherent_dma_ops);
> > > > > > return 0;
> > > > > > }
> > > > > > +
> > > > > > +static inline bool is_dma_coherent(struct device *dev)
> > > > > > +{
> > > > > > + return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
> > > > > > +}
> > > > >
> > > > > Hmm, what about the IOMMU ops?
> > > >
> > > > Maybe I should check __generic_dma_ops(dev) != &arm_dma_ops?
> > > > Do you have any better suggestions?
> > >
> > > ping?
> >
> > Without any clear idea about why this is needed or how it's used, I don't
> > have any better ideas.
>
> Thanks for the quick reply.
>
> It is used in dom0 to figure out whether the device is not coherent: in
> that case Dom0 is going to issue an hypercall and Xen is going to
> execute the required cache maintenance operations on Dom0's behalf.
>
> In practice for this use case iommu_dma_ops is not a problem, as the
> device is never going to appear as being behind an SMMU in dom0.
re-ping?
On Wed, 22 Oct 2014, Stefano Stabellini wrote:
> On Mon, 13 Oct 2014, Stefano Stabellini wrote:
> > On Mon, 13 Oct 2014, Will Deacon wrote:
> > > On Mon, Oct 13, 2014 at 12:16:14PM +0100, Stefano Stabellini wrote:
> > > > On Fri, 10 Oct 2014, Stefano Stabellini wrote:
> > > > > On Fri, 10 Oct 2014, Will Deacon wrote:
> > > > > > On Fri, Oct 10, 2014 at 12:51:44PM +0100, Stefano Stabellini wrote:
> > > > > > > Introduce a function to check whether a device is dma coherent.
> > > > > > >
> > > > > > > Signed-off-by: Stefano Stabellini <[email protected]>
> > > > > > > CC: [email protected]
> > > > > > > CC: [email protected]
> > > > > > > CC: [email protected]
> > > > > > > CC: [email protected]
> > > > > > > ---
> > > > > > > arch/arm/include/asm/dma-mapping.h | 6 ++++++
> > > > > > > arch/arm64/include/asm/dma-mapping.h | 5 +++++
> > > > > > > 2 files changed, 11 insertions(+)
> > > > > > >
> > > > > > > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> > > > > > > index c45b61a..bededbb 100644
> > > > > > > --- a/arch/arm/include/asm/dma-mapping.h
> > > > > > > +++ b/arch/arm/include/asm/dma-mapping.h
> > > > > > > @@ -126,6 +126,12 @@ static inline int set_arch_dma_coherent_ops(struct device *dev)
> > > > > > > set_dma_ops(dev, &arm_coherent_dma_ops);
> > > > > > > return 0;
> > > > > > > }
> > > > > > > +
> > > > > > > +static inline bool is_dma_coherent(struct device *dev)
> > > > > > > +{
> > > > > > > + return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
> > > > > > > +}
> > > > > >
> > > > > > Hmm, what about the IOMMU ops?
> > > > >
> > > > > Maybe I should check __generic_dma_ops(dev) != &arm_dma_ops?
> > > > > Do you have any better suggestions?
> > > >
> > > > ping?
> > >
> > > Without any clear idea about why this is needed or how it's used, I don't
> > > have any better ideas.
> >
> > Thanks for the quick reply.
> >
> > It is used in dom0 to figure out whether the device is not coherent: in
> > that case Dom0 is going to issue an hypercall and Xen is going to
> > execute the required cache maintenance operations on Dom0's behalf.
> >
> > In practice for this use case iommu_dma_ops is not a problem, as the
> > device is never going to appear as being behind an SMMU in dom0.
>
> re-ping?
I don't mean to be pushy but this series is required to fix a regression
that causes dom0 to crash when running as dom0 on Xen on ARM if
non-coherent devices are present on the platform.
We should aim at getting it in 3.18 or we are going to release a broken
kernel.
I am thinking of reposting the series keeping the ugly is_dma_coherent
function in a Xen specific source file as a static function and then fix
it up properly in common code later with your help?
On Thu, Oct 23, 2014 at 03:22:27PM +0100, Stefano Stabellini wrote:
> On Wed, 22 Oct 2014, Stefano Stabellini wrote:
> > On Mon, 13 Oct 2014, Stefano Stabellini wrote:
> > > On Mon, 13 Oct 2014, Will Deacon wrote:
> > > > On Mon, Oct 13, 2014 at 12:16:14PM +0100, Stefano Stabellini wrote:
> > > > > On Fri, 10 Oct 2014, Stefano Stabellini wrote:
> > > > > > On Fri, 10 Oct 2014, Will Deacon wrote:
> > > > > > > On Fri, Oct 10, 2014 at 12:51:44PM +0100, Stefano Stabellini wrote:
> > > > > > > > Introduce a function to check whether a device is dma coherent.
> > > > > > > >
> > > > > > > > Signed-off-by: Stefano Stabellini <[email protected]>
> > > > > > > > CC: [email protected]
> > > > > > > > CC: [email protected]
> > > > > > > > CC: [email protected]
> > > > > > > > CC: [email protected]
> > > > > > > > ---
> > > > > > > > arch/arm/include/asm/dma-mapping.h | 6 ++++++
> > > > > > > > arch/arm64/include/asm/dma-mapping.h | 5 +++++
> > > > > > > > 2 files changed, 11 insertions(+)
> > > > > > > >
> > > > > > > > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> > > > > > > > index c45b61a..bededbb 100644
> > > > > > > > --- a/arch/arm/include/asm/dma-mapping.h
> > > > > > > > +++ b/arch/arm/include/asm/dma-mapping.h
> > > > > > > > @@ -126,6 +126,12 @@ static inline int set_arch_dma_coherent_ops(struct device *dev)
> > > > > > > > set_dma_ops(dev, &arm_coherent_dma_ops);
> > > > > > > > return 0;
> > > > > > > > }
> > > > > > > > +
> > > > > > > > +static inline bool is_dma_coherent(struct device *dev)
> > > > > > > > +{
> > > > > > > > + return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
> > > > > > > > +}
> > > > > > >
> > > > > > > Hmm, what about the IOMMU ops?
> > > > > >
> > > > > > Maybe I should check __generic_dma_ops(dev) != &arm_dma_ops?
> > > > > > Do you have any better suggestions?
> > > > >
> > > > > ping?
> > > >
> > > > Without any clear idea about why this is needed or how it's used, I don't
> > > > have any better ideas.
> > >
> > > Thanks for the quick reply.
> > >
> > > It is used in dom0 to figure out whether the device is not coherent: in
> > > that case Dom0 is going to issue an hypercall and Xen is going to
> > > execute the required cache maintenance operations on Dom0's behalf.
> > >
> > > In practice for this use case iommu_dma_ops is not a problem, as the
> > > device is never going to appear as being behind an SMMU in dom0.
> >
> > re-ping?
>
> I don't mean to be pushy but this series is required to fix a regression
> that causes dom0 to crash when running as dom0 on Xen on ARM if
> non-coherent devices are present on the platform.
Sorry for ignoring it but I don't fully understand how Xen works,
especially this DMA stuff. I recall some blogs you wrote but I don't
have time to re-read them (and I'm on holiday next week).
I think a better way would be some Xen hook around
set_arch_dma_coherent_ops(). Does Xen have its own device tracking
structures? If not, you may be able to add another bitfield to the
kernel one.
--
Catalin
On Fri, 24 Oct 2014, Catalin Marinas wrote:
> On Thu, Oct 23, 2014 at 03:22:27PM +0100, Stefano Stabellini wrote:
> > On Wed, 22 Oct 2014, Stefano Stabellini wrote:
> > > On Mon, 13 Oct 2014, Stefano Stabellini wrote:
> > > > On Mon, 13 Oct 2014, Will Deacon wrote:
> > > > > On Mon, Oct 13, 2014 at 12:16:14PM +0100, Stefano Stabellini wrote:
> > > > > > On Fri, 10 Oct 2014, Stefano Stabellini wrote:
> > > > > > > On Fri, 10 Oct 2014, Will Deacon wrote:
> > > > > > > > On Fri, Oct 10, 2014 at 12:51:44PM +0100, Stefano Stabellini wrote:
> > > > > > > > > Introduce a function to check whether a device is dma coherent.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Stefano Stabellini <[email protected]>
> > > > > > > > > CC: [email protected]
> > > > > > > > > CC: [email protected]
> > > > > > > > > CC: [email protected]
> > > > > > > > > CC: [email protected]
> > > > > > > > > ---
> > > > > > > > > arch/arm/include/asm/dma-mapping.h | 6 ++++++
> > > > > > > > > arch/arm64/include/asm/dma-mapping.h | 5 +++++
> > > > > > > > > 2 files changed, 11 insertions(+)
> > > > > > > > >
> > > > > > > > > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> > > > > > > > > index c45b61a..bededbb 100644
> > > > > > > > > --- a/arch/arm/include/asm/dma-mapping.h
> > > > > > > > > +++ b/arch/arm/include/asm/dma-mapping.h
> > > > > > > > > @@ -126,6 +126,12 @@ static inline int set_arch_dma_coherent_ops(struct device *dev)
> > > > > > > > > set_dma_ops(dev, &arm_coherent_dma_ops);
> > > > > > > > > return 0;
> > > > > > > > > }
> > > > > > > > > +
> > > > > > > > > +static inline bool is_dma_coherent(struct device *dev)
> > > > > > > > > +{
> > > > > > > > > + return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
> > > > > > > > > +}
> > > > > > > >
> > > > > > > > Hmm, what about the IOMMU ops?
> > > > > > >
> > > > > > > Maybe I should check __generic_dma_ops(dev) != &arm_dma_ops?
> > > > > > > Do you have any better suggestions?
> > > > > >
> > > > > > ping?
> > > > >
> > > > > Without any clear idea about why this is needed or how it's used, I don't
> > > > > have any better ideas.
> > > >
> > > > Thanks for the quick reply.
> > > >
> > > > It is used in dom0 to figure out whether the device is not coherent: in
> > > > that case Dom0 is going to issue an hypercall and Xen is going to
> > > > execute the required cache maintenance operations on Dom0's behalf.
> > > >
> > > > In practice for this use case iommu_dma_ops is not a problem, as the
> > > > device is never going to appear as being behind an SMMU in dom0.
> > >
> > > re-ping?
> >
> > I don't mean to be pushy but this series is required to fix a regression
> > that causes dom0 to crash when running as dom0 on Xen on ARM if
> > non-coherent devices are present on the platform.
>
> Sorry for ignoring it but I don't fully understand how Xen works,
> especially this DMA stuff. I recall some blogs you wrote but I don't
> have time to re-read them (and I'm on holiday next week).
No worries.
> I think a better way would be some Xen hook around
> set_arch_dma_coherent_ops(). Does Xen have its own device tracking
> structures? If not, you may be able to add another bitfield to the
> kernel one.
We don't have an additional device tracking struct on Xen.
I agree that a new bit somewhere would be the best solution, but I am
not sure where. Maybe in dev_archdata under arm and arm64? After all it
is already used to keep pointers to dma and coherency related
structures.
However given the timing constraints I hope you would be OK with the
suboptimal solution for now and create a common is_dma_coherent function
in 3.19?
On Fri, Oct 24, 2014 at 12:39:59PM +0100, Stefano Stabellini wrote:
> On Fri, 24 Oct 2014, Catalin Marinas wrote:
> > I think a better way would be some Xen hook around
> > set_arch_dma_coherent_ops(). Does Xen have its own device tracking
> > structures? If not, you may be able to add another bitfield to the
> > kernel one.
>
> We don't have an additional device tracking struct on Xen.
> I agree that a new bit somewhere would be the best solution, but I am
> not sure where. Maybe in dev_archdata under arm and arm64? After all it
> is already used to keep pointers to dma and coherency related
> structures.
I was thinking about something like below (maybe with some additional
ARCH_HAS_NONCOHERENT_DMA config for architectures that are always
coherent):
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 3b64d0bf5bba..ae399ccbd569 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -183,6 +183,7 @@ static void of_dma_configure(struct device *dev)
* dma coherent operations.
*/
if (of_dma_is_coherent(dev->of_node)) {
+ dev->dma_coherent = 1;
set_arch_dma_coherent_ops(dev);
dev_dbg(dev, "device is dma coherent\n");
}
diff --git a/include/linux/device.h b/include/linux/device.h
index ce1f21608b16..e00ca876db01 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -796,6 +796,7 @@ struct device {
bool offline_disabled:1;
bool offline:1;
+ bool dma_coherent:1;
};
static inline struct device *kobj_to_dev(struct kobject *kobj)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index d5d388160f42..9c9ba5a5428e 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -78,6 +78,11 @@ static inline int is_device_dma_capable(struct device *dev)
return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
}
+static inline int is_device_dma_coherent(struct device *dev)
+{
+ return dev->dma_coherent;
+}
+
#ifdef CONFIG_HAS_DMA
#include <asm/dma-mapping.h>
#else
> However given the timing constraints I hope you would be OK with the
> suboptimal solution for now and create a common is_dma_coherent function
> in 3.19?
If you want to push something for 3.18, you could have a temporary
solution but I would prefer a bool or something in the dev_archdata
structure. Another untested patch:
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index cf98b362094b..243ef256b8c9 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -21,6 +21,7 @@ struct dev_archdata {
#ifdef CONFIG_IOMMU_API
void *iommu; /* private IOMMU data */
#endif
+ bool dma_coherent;
};
struct pdev_archdata {
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index adeae3f6f0fc..b6bc4c268878 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -54,11 +54,17 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
static inline int set_arch_dma_coherent_ops(struct device *dev)
{
+ dev->dev_archdata.dma_coherent = true;
set_dma_ops(dev, &coherent_swiotlb_dma_ops);
return 0;
}
#define set_arch_dma_coherent_ops set_arch_dma_coherent_ops
+static inline int is_device_dma_coherent(struct device *dev)
+{
+ return dev->dev_archdata.dma_coherent;
+}
+
#include <asm-generic/dma-mapping-common.h>
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
This way you don't have to test for swiotlb vs iommu ops (we don't have
the latter yet on arm64 but they are coming).
--
Catalin
On Fri, 24 Oct 2014, Catalin Marinas wrote:
> On Fri, Oct 24, 2014 at 12:39:59PM +0100, Stefano Stabellini wrote:
> > On Fri, 24 Oct 2014, Catalin Marinas wrote:
> > > I think a better way would be some Xen hook around
> > > set_arch_dma_coherent_ops(). Does Xen have its own device tracking
> > > structures? If not, you may be able to add another bitfield to the
> > > kernel one.
> >
> > We don't have an additional device tracking struct on Xen.
> > I agree that a new bit somewhere would be the best solution, but I am
> > not sure where. Maybe in dev_archdata under arm and arm64? After all it
> > is already used to keep pointers to dma and coherency related
> > structures.
>
> I was thinking about something like below (maybe with some additional
> ARCH_HAS_NONCOHERENT_DMA config for architectures that are always
> coherent):
>
> diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> index 3b64d0bf5bba..ae399ccbd569 100644
> --- a/drivers/of/platform.c
> +++ b/drivers/of/platform.c
> @@ -183,6 +183,7 @@ static void of_dma_configure(struct device *dev)
> * dma coherent operations.
> */
> if (of_dma_is_coherent(dev->of_node)) {
> + dev->dma_coherent = 1;
> set_arch_dma_coherent_ops(dev);
> dev_dbg(dev, "device is dma coherent\n");
> }
> diff --git a/include/linux/device.h b/include/linux/device.h
> index ce1f21608b16..e00ca876db01 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -796,6 +796,7 @@ struct device {
>
> bool offline_disabled:1;
> bool offline:1;
> + bool dma_coherent:1;
> };
>
> static inline struct device *kobj_to_dev(struct kobject *kobj)
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index d5d388160f42..9c9ba5a5428e 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -78,6 +78,11 @@ static inline int is_device_dma_capable(struct device *dev)
> return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
> }
>
> +static inline int is_device_dma_coherent(struct device *dev)
> +{
> + return dev->dma_coherent;
> +}
> +
> #ifdef CONFIG_HAS_DMA
> #include <asm/dma-mapping.h>
> #else
This is probably the cleanest option. I am going to send it out and see
what the comments are.
I might still be able to request a backport if it doesn't make 3.18.
> > However given the timing constraints I hope you would be OK with the
> > suboptimal solution for now and create a common is_dma_coherent function
> > in 3.19?
>
> If you want to push something for 3.18, you could have a temporary
> solution but I would prefer a bool or something in the dev_archdata
> structure. Another untested patch:
>
> diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
> index cf98b362094b..243ef256b8c9 100644
> --- a/arch/arm64/include/asm/device.h
> +++ b/arch/arm64/include/asm/device.h
> @@ -21,6 +21,7 @@ struct dev_archdata {
> #ifdef CONFIG_IOMMU_API
> void *iommu; /* private IOMMU data */
> #endif
> + bool dma_coherent;
> };
>
> struct pdev_archdata {
> diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
> index adeae3f6f0fc..b6bc4c268878 100644
> --- a/arch/arm64/include/asm/dma-mapping.h
> +++ b/arch/arm64/include/asm/dma-mapping.h
> @@ -54,11 +54,17 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
>
> static inline int set_arch_dma_coherent_ops(struct device *dev)
> {
> + dev->dev_archdata.dma_coherent = true;
> set_dma_ops(dev, &coherent_swiotlb_dma_ops);
> return 0;
> }
> #define set_arch_dma_coherent_ops set_arch_dma_coherent_ops
>
> +static inline int is_device_dma_coherent(struct device *dev)
> +{
> + return dev->dev_archdata.dma_coherent;
> +}
> +
> #include <asm-generic/dma-mapping-common.h>
>
> static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
>
>
> This way you don't have to test for swiotlb vs iommu ops (we don't have
> the latter yet on arm64 but they are coming).
>
> --
> Catalin
>
On Fri, 24 Oct 2014, Stefano Stabellini wrote:
> On Fri, 24 Oct 2014, Catalin Marinas wrote:
> > On Fri, Oct 24, 2014 at 12:39:59PM +0100, Stefano Stabellini wrote:
> > > On Fri, 24 Oct 2014, Catalin Marinas wrote:
> > > > I think a better way would be some Xen hook around
> > > > set_arch_dma_coherent_ops(). Does Xen have its own device tracking
> > > > structures? If not, you may be able to add another bitfield to the
> > > > kernel one.
> > >
> > > We don't have an additional device tracking struct on Xen.
> > > I agree that a new bit somewhere would be the best solution, but I am
> > > not sure where. Maybe in dev_archdata under arm and arm64? After all it
> > > is already used to keep pointers to dma and coherency related
> > > structures.
> >
> > I was thinking about something like below (maybe with some additional
> > ARCH_HAS_NONCOHERENT_DMA config for architectures that are always
> > coherent):
I don't think that introducing ARCH_HAS_NONCOHERENT_DMA is necessary here.
> > diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> > index 3b64d0bf5bba..ae399ccbd569 100644
> > --- a/drivers/of/platform.c
> > +++ b/drivers/of/platform.c
> > @@ -183,6 +183,7 @@ static void of_dma_configure(struct device *dev)
> > * dma coherent operations.
> > */
> > if (of_dma_is_coherent(dev->of_node)) {
> > + dev->dma_coherent = 1;
> > set_arch_dma_coherent_ops(dev);
> > dev_dbg(dev, "device is dma coherent\n");
> > }
> > diff --git a/include/linux/device.h b/include/linux/device.h
> > index ce1f21608b16..e00ca876db01 100644
> > --- a/include/linux/device.h
> > +++ b/include/linux/device.h
> > @@ -796,6 +796,7 @@ struct device {
> >
> > bool offline_disabled:1;
> > bool offline:1;
> > + bool dma_coherent:1;
> > };
> >
> > static inline struct device *kobj_to_dev(struct kobject *kobj)
> > diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> > index d5d388160f42..9c9ba5a5428e 100644
> > --- a/include/linux/dma-mapping.h
> > +++ b/include/linux/dma-mapping.h
> > @@ -78,6 +78,11 @@ static inline int is_device_dma_capable(struct device *dev)
> > return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
> > }
> >
> > +static inline int is_device_dma_coherent(struct device *dev)
> > +{
> > + return dev->dma_coherent;
> > +}
> > +
> > #ifdef CONFIG_HAS_DMA
> > #include <asm/dma-mapping.h>
> > #else
>
> This is probably the cleanest option. I am going to send it out and see
> what the comments are.
>
> I might still be able to request a backport if it doesn't make 3.18.
>
>
> > > However given the timing constraints I hope you would be OK with the
> > > suboptimal solution for now and create a common is_dma_coherent function
> > > in 3.19?
> >
> > If you want to push something for 3.18, you could have a temporary
> > solution but I would prefer a bool or something in the dev_archdata
> > structure. Another untested patch:
> >
> > diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
> > index cf98b362094b..243ef256b8c9 100644
> > --- a/arch/arm64/include/asm/device.h
> > +++ b/arch/arm64/include/asm/device.h
> > @@ -21,6 +21,7 @@ struct dev_archdata {
> > #ifdef CONFIG_IOMMU_API
> > void *iommu; /* private IOMMU data */
> > #endif
> > + bool dma_coherent;
> > };
> >
> > struct pdev_archdata {
> > diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
> > index adeae3f6f0fc..b6bc4c268878 100644
> > --- a/arch/arm64/include/asm/dma-mapping.h
> > +++ b/arch/arm64/include/asm/dma-mapping.h
> > @@ -54,11 +54,17 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
> >
> > static inline int set_arch_dma_coherent_ops(struct device *dev)
> > {
> > + dev->dev_archdata.dma_coherent = true;
> > set_dma_ops(dev, &coherent_swiotlb_dma_ops);
> > return 0;
> > }
> > #define set_arch_dma_coherent_ops set_arch_dma_coherent_ops
> >
> > +static inline int is_device_dma_coherent(struct device *dev)
> > +{
> > + return dev->dev_archdata.dma_coherent;
> > +}
> > +
> > #include <asm-generic/dma-mapping-common.h>
> >
> > static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> >
> >
> > This way you don't have to test for swiotlb vs iommu ops (we don't have
> > the latter yet on arm64 but they are coming).
> >
> > --
> > Catalin
> >
>
On Sat, 25 Oct 2014, Ian Campbell wrote:
> On Sat, 2014-10-25 at 14:29 +0100, Stefano Stabellini wrote:
> >
> > Your suggestions and looking more at the code gave me another idea, that
> > I think is clean and at the same time suitable for 3.18.
> > What do you think of the following? It is simple, self-contained and
> > doesn't need a new flag in struct device.
>
> of_dma_is_coherent looks to be quite expensive though (walks up the
> Device Tree doing strcmps on each property of each node until it finds
> the one it is looking for.
It takes spin_locks too!
Too bad, I think I'll have to ditch it. In that case I'l try the new
flag in struct device approach.
On Fri, 24 Oct 2014, Stefano Stabellini wrote:
> On Fri, 24 Oct 2014, Stefano Stabellini wrote:
> > On Fri, 24 Oct 2014, Catalin Marinas wrote:
> > > > However given the timing constraints I hope you would be OK with the
> > > > suboptimal solution for now and create a common is_dma_coherent function
> > > > in 3.19?
> > >
> > > If you want to push something for 3.18, you could have a temporary
> > > solution but I would prefer a bool or something in the dev_archdata
> > > structure. Another untested patch:
> > >
> > > diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
> > > index cf98b362094b..243ef256b8c9 100644
> > > --- a/arch/arm64/include/asm/device.h
> > > +++ b/arch/arm64/include/asm/device.h
> > > @@ -21,6 +21,7 @@ struct dev_archdata {
> > > #ifdef CONFIG_IOMMU_API
> > > void *iommu; /* private IOMMU data */
> > > #endif
> > > + bool dma_coherent;
> > > };
> > >
> > > struct pdev_archdata {
> > > diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
> > > index adeae3f6f0fc..b6bc4c268878 100644
> > > --- a/arch/arm64/include/asm/dma-mapping.h
> > > +++ b/arch/arm64/include/asm/dma-mapping.h
> > > @@ -54,11 +54,17 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
> > >
> > > static inline int set_arch_dma_coherent_ops(struct device *dev)
> > > {
> > > + dev->dev_archdata.dma_coherent = true;
> > > set_dma_ops(dev, &coherent_swiotlb_dma_ops);
> > > return 0;
> > > }
> > > #define set_arch_dma_coherent_ops set_arch_dma_coherent_ops
> > >
> > > +static inline int is_device_dma_coherent(struct device *dev)
> > > +{
> > > + return dev->dev_archdata.dma_coherent;
> > > +}
> > > +
> > > #include <asm-generic/dma-mapping-common.h>
> > >
> > > static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> > >
> > >
> > > This way you don't have to test for swiotlb vs iommu ops (we don't have
> > > the latter yet on arm64 but they are coming).
Your suggestions and looking more at the code gave me another idea, that
I think is clean and at the same time suitable for 3.18.
What do you think of the following? It is simple, self-contained and
doesn't need a new flag in struct device.
diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
index 6153d61..2b259f1 100644
--- a/arch/arm/xen/mm32.c
+++ b/arch/arm/xen/mm32.c
@@ -2,10 +2,16 @@
#include <linux/dma-mapping.h>
#include <linux/gfp.h>
#include <linux/highmem.h>
+#include <linux/of_address.h>
#include <xen/features.h>
+static inline bool is_dma_coherent(struct device *dev)
+{
+ return of_dma_is_coherent(dev->of_node);
+}
+
/* functions called by SWIOTLB */
static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
On Sat, 2014-10-25 at 14:29 +0100, Stefano Stabellini wrote:
>
> Your suggestions and looking more at the code gave me another idea, that
> I think is clean and at the same time suitable for 3.18.
> What do you think of the following? It is simple, self-contained and
> doesn't need a new flag in struct device.
of_dma_is_coherent looks to be quite expensive though (walks up the
Device Tree doing strcmps on each property of each node until it finds
the one it is looking for.
Ian.
On 25/10/14 17:15, Stefano Stabellini wrote:
> On Sat, 25 Oct 2014, Ian Campbell wrote:
>> On Sat, 2014-10-25 at 14:29 +0100, Stefano Stabellini wrote:
>>>
>>> Your suggestions and looking more at the code gave me another idea, that
>>> I think is clean and at the same time suitable for 3.18.
>>> What do you think of the following? It is simple, self-contained and
>>> doesn't need a new flag in struct device.
>>
>> of_dma_is_coherent looks to be quite expensive though (walks up the
>> Device Tree doing strcmps on each property of each node until it finds
>> the one it is looking for.
>
> It takes spin_locks too!
> Too bad, I think I'll have to ditch it. In that case I'l try the new
> flag in struct device approach.
If you're having to make changes to struct device, this is looking like
a series for 3.19 (not 3.18).
David
On Mon, 27 Oct 2014, David Vrabel wrote:
> On 25/10/14 17:15, Stefano Stabellini wrote:
> > On Sat, 25 Oct 2014, Ian Campbell wrote:
> >> On Sat, 2014-10-25 at 14:29 +0100, Stefano Stabellini wrote:
> >>>
> >>> Your suggestions and looking more at the code gave me another idea, that
> >>> I think is clean and at the same time suitable for 3.18.
> >>> What do you think of the following? It is simple, self-contained and
> >>> doesn't need a new flag in struct device.
> >>
> >> of_dma_is_coherent looks to be quite expensive though (walks up the
> >> Device Tree doing strcmps on each property of each node until it finds
> >> the one it is looking for.
> >
> > It takes spin_locks too!
> > Too bad, I think I'll have to ditch it. In that case I'l try the new
> > flag in struct device approach.
>
> If you're having to make changes to struct device, this is looking like
> a series for 3.19 (not 3.18).
After looking more into it, I went for adding the flag to dev_archdata
under arm and arm64. Let's see what the maintainers say.