2014-10-23 17:31:35

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v5 0/6] introduce GNTTABOP_cache_flush

Hi all,
this patch series introduces support for GNTTABOP_cache_flush to perform
cache maintenance operation on foreign pages and reverts the current
code based on XENFEAT_grant_map_identity.

It also provides a very slow fallback by bouncing on the swiotlb buffer,
in case the hypercall is not available.

v5 introduces is_dma_coherent as a Xen specific utility function named
xen_is_dma_coherent. However in the future we are going to re-work it as
a proper common arch/arm (and arch/arm64) function.



Changes in v5:
- introduce xen_is_dma_coherent as a xen specific function;
- call xen_is_dma_coherent instead of is_dma_coherent;
- introduce xen_is_dma_coherent to
arch/arm64/include/asm/xen/page-coherent.h;
- do not remove arch/arm64/include/asm/xen/page-coherent.h, add
the missing dma_ops calls from it;
- fix indentation;
- rename hypercall_flush to hypercall_cflush;
- remove spurious change.

Changes in v4:
- remove outer_*_range call;
- introduce is_dma_coherent;
- use is_dma_coherent in arch/arm/xen/mm32.c;
- merge xen/mm32.c into xen/mm.c;
- xen/arm/arm64: introduce xen_arch_need_swiotlb;
- avoid bouncing dma map operations that involve foreign grants and
non-coherent devices if GNTTABOP_cache_flush is provided by Xen.

Changes in v3:
- fix the cache maintenance op call to match what Linux does natively;
- update the hypercall interface to match Xen.

Changes in v2:
- remove the definition of XENFEAT_grant_map_identity;
- update the hypercall interface to match Xen;
- call the interface on a single page at a time.


Stefano Stabellini (6):
xen/arm: remove handling of XENFEAT_grant_map_identity
xen/arm: remove outer_*_range call
xen/arm: introduce xen_is_dma_coherent
xen/arm/arm64: merge xen/mm32.c into xen/mm.c
xen/arm/arm64: introduce xen_arch_need_swiotlb
xen/arm: introduce GNTTABOP_cache_flush

arch/arm/include/asm/xen/page-coherent.h | 5 +
arch/arm/include/asm/xen/page.h | 4 +
arch/arm/xen/Makefile | 2 +-
arch/arm/xen/enlighten.c | 5 -
arch/arm/xen/mm.c | 160 ++++++++++++++++++++++
arch/arm/xen/mm32.c | 202 ----------------------------
arch/arm64/include/asm/xen/page-coherent.h | 25 ++--
arch/x86/include/asm/xen/page.h | 7 +
drivers/xen/swiotlb-xen.c | 5 +-
include/xen/interface/features.h | 3 -
include/xen/interface/grant_table.h | 19 +++
11 files changed, 213 insertions(+), 224 deletions(-)
delete mode 100644 arch/arm/xen/mm32.c


2014-10-23 17:35:08

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v5 3/6] xen/arm: introduce xen_is_dma_coherent

Introduce a utility function to check whether a device is dma coherent.
The current implementation is suboptimal. Let's rework it later in
common arch/arm code.

Use it to figure out whether we need to issue cache maintenance
operations instead of checking on the existence of a particular dma_ops
function for the device.

Signed-off-by: Stefano Stabellini <[email protected]>

---

Changes in v5:
- introduce xen_is_dma_coherent as a xen specific function;
- call xen_is_dma_coherent instead of is_dma_coherent.
---
arch/arm/include/asm/xen/page-coherent.h | 5 +++++
arch/arm/xen/mm32.c | 6 +++---
2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index e8275ea..6f0f882 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -5,6 +5,11 @@
#include <linux/dma-attrs.h>
#include <linux/dma-mapping.h>

+static inline bool xen_is_dma_coherent(struct device *dev)
+{
+ return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
+}
+
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
index 6153d61..6c0224e 100644
--- a/arch/arm/xen/mm32.c
+++ b/arch/arm/xen/mm32.c
@@ -76,7 +76,7 @@ void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
struct dma_attrs *attrs)

{
- if (!__generic_dma_ops(hwdev)->unmap_page)
+ if (xen_is_dma_coherent(hwdev))
return;
if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
return;
@@ -87,7 +87,7 @@ void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
- if (!__generic_dma_ops(hwdev)->sync_single_for_cpu)
+ if (xen_is_dma_coherent(hwdev))
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
}
@@ -95,7 +95,7 @@ void xen_dma_sync_single_for_cpu(struct device *hwdev,
void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
- if (!__generic_dma_ops(hwdev)->sync_single_for_device)
+ if (xen_is_dma_coherent(hwdev))
return;
__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
}
--
1.7.10.4

2014-10-23 17:35:06

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v5 2/6] xen/arm: remove outer_*_range call

Dom0 is not actually capable of issuing outer_inv_range or
outer_clean_range calls.

Signed-off-by: Stefano Stabellini <[email protected]>
---
arch/arm/xen/mm32.c | 9 ---------
1 file changed, 9 deletions(-)

diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
index a5a93fc..6153d61 100644
--- a/arch/arm/xen/mm32.c
+++ b/arch/arm/xen/mm32.c
@@ -61,9 +61,6 @@ static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
/* Cannot use __dma_page_dev_to_cpu because we don't have a
* struct page for handle */

- if (dir != DMA_TO_DEVICE)
- outer_inv_range(handle, handle + size);
-
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_unmap_area);
}

@@ -72,12 +69,6 @@ static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
{

dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_map_area);
-
- if (dir == DMA_FROM_DEVICE) {
- outer_inv_range(handle, handle + size);
- } else {
- outer_clean_range(handle, handle + size);
- }
}

void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
--
1.7.10.4

2014-10-23 17:35:04

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v5 6/6] xen/arm: introduce GNTTABOP_cache_flush

Introduce support for new hypercall GNTTABOP_cache_flush.
Use it to perform cache flashing on pages used for dma when necessary.

If GNTTABOP_cache_flush is supported by the hypervisor, we don't need to
bounce dma map operations that involve foreign grants and non-coherent
devices.

Signed-off-by: Stefano Stabellini <[email protected]>
Acked-by: Ian Campbell <[email protected]>

---

Changes in v5:
- rename hypercall_flush to hypercall_cflush;
- remove spurious change.

Changes in v4:
- add comment;
- avoid bouncing dma map operations that involve foreign grants and
non-coherent devices if GNTTABOP_cache_flush is provided by Xen.

Changes in v3:
- fix the cache maintenance op call to match what Linux does natively;
- update the hypercall interface to match Xen.

Changes in v2:
- update the hypercall interface to match Xen;
- call the interface on a single page at a time.
---
arch/arm/xen/mm.c | 39 ++++++++++++++++++++++++++++++-----
include/xen/interface/grant_table.h | 19 +++++++++++++++++
2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 728c245..26ad47c 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -11,6 +11,7 @@
#include <linux/swiotlb.h>

#include <xen/xen.h>
+#include <xen/interface/grant_table.h>
#include <xen/interface/memory.h>
#include <xen/swiotlb-xen.h>

@@ -45,6 +46,8 @@ static inline void *kmap_high_get(struct page *page)
static inline void kunmap_high(struct page *page) {}
#endif

+static bool hypercall_cflush = false;
+

/* functions called by SWIOTLB */

@@ -61,17 +64,35 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
do {
size_t len = left;
void *vaddr;
+
+ /* buffers in highmem or foreign pages cannot cross page
+ * boundaries */
+ if (len + offset > PAGE_SIZE)
+ len = PAGE_SIZE - offset;

if (!pfn_valid(pfn))
{
- /* TODO: cache flush */
+ struct gnttab_cache_flush cflush;
+
+ cflush.op = 0;
+ cflush.a.dev_bus_addr = pfn << PAGE_SHIFT;
+ cflush.offset = offset;
+ cflush.length = len;
+
+ if (op == dmac_unmap_area && dir != DMA_TO_DEVICE)
+ cflush.op = GNTTAB_CACHE_INVAL;
+ if (op == dmac_map_area) {
+ if (dir == DMA_FROM_DEVICE)
+ cflush.op = GNTTAB_CACHE_INVAL;
+ else
+ cflush.op = GNTTAB_CACHE_CLEAN;
+ }
+ if (cflush.op)
+ HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
} else {
struct page *page = pfn_to_page(pfn);

if (PageHighMem(page)) {
- if (len + offset > PAGE_SIZE)
- len = PAGE_SIZE - offset;
-
if (cache_is_vipt_nonaliasing()) {
vaddr = kmap_atomic(page);
op(vaddr + offset, len, dir);
@@ -144,7 +165,7 @@ bool xen_arch_need_swiotlb(struct device *dev,
unsigned long pfn,
unsigned long mfn)
{
- return ((pfn != mfn) && !xen_is_dma_coherent(dev));
+ return (!hypercall_cflush && (pfn != mfn) && !xen_is_dma_coherent(dev));
}

int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
@@ -187,10 +208,18 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {

int __init xen_mm_init(void)
{
+ struct gnttab_cache_flush cflush;
if (!xen_initial_domain())
return 0;
xen_swiotlb_init(1, false);
xen_dma_ops = &xen_swiotlb_dma_ops;
+
+ cflush.op = 0;
+ cflush.a.dev_bus_addr = 0;
+ cflush.offset = 0;
+ cflush.length = 0;
+ if (HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1) != -ENOSYS)
+ hypercall_cflush = true;
return 0;
}
arch_initcall(xen_mm_init);
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
index e40fae9..bcce564 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -479,6 +479,25 @@ struct gnttab_get_version {
DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);

/*
+ * Issue one or more cache maintenance operations on a portion of a
+ * page granted to the calling domain by a foreign domain.
+ */
+#define GNTTABOP_cache_flush 12
+struct gnttab_cache_flush {
+ union {
+ uint64_t dev_bus_addr;
+ grant_ref_t ref;
+ } a;
+ uint16_t offset; /* offset from start of grant */
+ uint16_t length; /* size within the grant */
+#define GNTTAB_CACHE_CLEAN (1<<0)
+#define GNTTAB_CACHE_INVAL (1<<1)
+#define GNTTAB_CACHE_SOURCE_GREF (1<<31)
+ uint32_t op;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush);
+
+/*
* Bitfield values for update_pin_status.flags.
*/
/* Map the grant entry for access by I/O devices. */
--
1.7.10.4

2014-10-23 17:35:53

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v5 5/6] xen/arm/arm64: introduce xen_arch_need_swiotlb

Introduce an arch specific function to find out whether a particular dma
mapping operation needs to bounce on the swiotlb buffer.

On ARM and ARM64, if the page involved is a foreign page and the device
is not coherent, we need to bounce because at unmap time we cannot
execute any required cache maintenance operations (we don't know how to
find the pfn from the mfn).

No change of behaviour for x86.

Signed-off-by: Stefano Stabellini <[email protected]>

---

Changes in v5:
- fix indentation.
---
arch/arm/include/asm/xen/page.h | 4 ++++
arch/arm/xen/mm.c | 7 +++++++
arch/x86/include/asm/xen/page.h | 7 +++++++
drivers/xen/swiotlb-xen.c | 5 ++++-
4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 135c24a..fb7839d 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -107,4 +107,8 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
#define xen_remap(cookie, size) ioremap_cache((cookie), (size))
#define xen_unmap(cookie) iounmap((cookie))

+bool xen_arch_need_swiotlb(struct device *dev,
+ unsigned long pfn,
+ unsigned long mfn);
+
#endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 659e1c3..728c245 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -140,6 +140,13 @@ void xen_dma_sync_single_for_device(struct device *hwdev,
__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
}

+bool xen_arch_need_swiotlb(struct device *dev,
+ unsigned long pfn,
+ unsigned long mfn)
+{
+ return ((pfn != mfn) && !xen_is_dma_coherent(dev));
+}
+
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits,
dma_addr_t *dma_handle)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c949923..800b6a9 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -236,4 +236,11 @@ void make_lowmem_page_readwrite(void *vaddr);
#define xen_remap(cookie, size) ioremap((cookie), (size));
#define xen_unmap(cookie) iounmap((cookie))

+static inline bool xen_arch_need_swiotlb(struct device *dev,
+ unsigned long pfn,
+ unsigned long mfn)
+{
+ return false;
+}
+
#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index ebd8f21..ac0bd60 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -399,7 +399,9 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
* buffering it.
*/
if (dma_capable(dev, dev_addr, size) &&
- !range_straddles_page_boundary(phys, size) && !swiotlb_force) {
+ !range_straddles_page_boundary(phys, size) &&
+ !xen_arch_need_swiotlb(dev, PFN_DOWN(phys), PFN_DOWN(dev_addr)) &&
+ !swiotlb_force) {
/* we are not interested in the dma_addr returned by
* xen_dma_map_page, only in the potential cache flushes executed
* by the function. */
@@ -557,6 +559,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
dma_addr_t dev_addr = xen_phys_to_bus(paddr);

if (swiotlb_force ||
+ xen_arch_need_swiotlb(hwdev, PFN_DOWN(paddr), PFN_DOWN(dev_addr)) ||
!dma_capable(hwdev, dev_addr, sg->length) ||
range_straddles_page_boundary(paddr, sg->length)) {
phys_addr_t map = swiotlb_tbl_map_single(hwdev,
--
1.7.10.4

2014-10-23 17:36:11

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v5 4/6] xen/arm/arm64: merge xen/mm32.c into xen/mm.c

Merge xen/mm32.c into xen/mm.c.
As a consequence the code gets compiled on arm64 too: introduce a few
compat functions to actually be able to compile it.
Introduce xen_is_dma_coherent on arm64 and call into the dma_ops
functions from the xen_dma wrappers in page-coherent.h to handle
non-coherent devices.

Signed-off-by: Stefano Stabellini <[email protected]>

---

Changes in v5:
- introduce xen_is_dma_coherent to
arch/arm64/include/asm/xen/page-coherent.h;
- do not remove arch/arm64/include/asm/xen/page-coherent.h, add
the missing dma_ops calls from it.
---
arch/arm/xen/Makefile | 2 +-
arch/arm/xen/mm.c | 124 ++++++++++++++++++++++++++++
arch/arm/xen/mm32.c | 110 ------------------------
arch/arm64/include/asm/xen/page-coherent.h | 25 +++---
4 files changed, 138 insertions(+), 123 deletions(-)
delete mode 100644 arch/arm/xen/mm32.c

diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile
index 1f85bfe..1296952 100644
--- a/arch/arm/xen/Makefile
+++ b/arch/arm/xen/Makefile
@@ -1 +1 @@
-obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o mm32.o
+obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index b0e77de..659e1c3 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -1,5 +1,8 @@
+#include <linux/cpu.h>
+#include <linux/dma-mapping.h>
#include <linux/bootmem.h>
#include <linux/gfp.h>
+#include <linux/highmem.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -13,9 +16,130 @@

#include <asm/cacheflush.h>
#include <asm/xen/page.h>
+#include <asm/xen/page-coherent.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>

+
+#ifdef CONFIG_ARM64
+static inline void dmac_map_area(const void *start, size_t size, int dir)
+{
+ return __dma_map_area(start, size, dir);
+}
+
+static inline void dmac_unmap_area(const void *start, size_t size, int dir)
+{
+ return __dma_unmap_area(start, size, dir);
+}
+
+static inline bool cache_is_vipt_nonaliasing(void)
+{
+ return true;
+}
+
+static inline void *kmap_high_get(struct page *page)
+{
+ return NULL;
+}
+
+static inline void kunmap_high(struct page *page) {}
+#endif
+
+
+/* functions called by SWIOTLB */
+
+static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
+ size_t size, enum dma_data_direction dir,
+ void (*op)(const void *, size_t, int))
+{
+ unsigned long pfn;
+ size_t left = size;
+
+ pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE;
+ offset %= PAGE_SIZE;
+
+ do {
+ size_t len = left;
+ void *vaddr;
+
+ if (!pfn_valid(pfn))
+ {
+ /* TODO: cache flush */
+ } else {
+ struct page *page = pfn_to_page(pfn);
+
+ if (PageHighMem(page)) {
+ if (len + offset > PAGE_SIZE)
+ len = PAGE_SIZE - offset;
+
+ if (cache_is_vipt_nonaliasing()) {
+ vaddr = kmap_atomic(page);
+ op(vaddr + offset, len, dir);
+ kunmap_atomic(vaddr);
+ } else {
+ vaddr = kmap_high_get(page);
+ if (vaddr) {
+ op(vaddr + offset, len, dir);
+ kunmap_high(page);
+ }
+ }
+ } else {
+ vaddr = page_address(page) + offset;
+ op(vaddr, len, dir);
+ }
+ }
+
+ offset = 0;
+ pfn++;
+ left -= len;
+ } while (left);
+}
+
+static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir)
+{
+ /* Cannot use __dma_page_dev_to_cpu because we don't have a
+ * struct page for handle */
+
+ dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_unmap_area);
+}
+
+static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir)
+{
+
+ dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_map_area);
+}
+
+void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+
+{
+ if (xen_is_dma_coherent(hwdev))
+ return;
+ if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ return;
+
+ __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
+}
+
+void xen_dma_sync_single_for_cpu(struct device *hwdev,
+ dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+ if (xen_is_dma_coherent(hwdev))
+ return;
+ __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
+}
+
+void xen_dma_sync_single_for_device(struct device *hwdev,
+ dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+ if (xen_is_dma_coherent(hwdev))
+ return;
+ __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
+}
+
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits,
dma_addr_t *dma_handle)
diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
deleted file mode 100644
index 6c0224e..0000000
--- a/arch/arm/xen/mm32.c
+++ /dev/null
@@ -1,110 +0,0 @@
-#include <linux/cpu.h>
-#include <linux/dma-mapping.h>
-#include <linux/gfp.h>
-#include <linux/highmem.h>
-
-#include <xen/features.h>
-
-
-/* functions called by SWIOTLB */
-
-static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
- size_t size, enum dma_data_direction dir,
- void (*op)(const void *, size_t, int))
-{
- unsigned long pfn;
- size_t left = size;
-
- pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE;
- offset %= PAGE_SIZE;
-
- do {
- size_t len = left;
- void *vaddr;
-
- if (!pfn_valid(pfn))
- {
- /* TODO: cache flush */
- } else {
- struct page *page = pfn_to_page(pfn);
-
- if (PageHighMem(page)) {
- if (len + offset > PAGE_SIZE)
- len = PAGE_SIZE - offset;
-
- if (cache_is_vipt_nonaliasing()) {
- vaddr = kmap_atomic(page);
- op(vaddr + offset, len, dir);
- kunmap_atomic(vaddr);
- } else {
- vaddr = kmap_high_get(page);
- if (vaddr) {
- op(vaddr + offset, len, dir);
- kunmap_high(page);
- }
- }
- } else {
- vaddr = page_address(page) + offset;
- op(vaddr, len, dir);
- }
- }
-
- offset = 0;
- pfn++;
- left -= len;
- } while (left);
-}
-
-static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir)
-{
- /* Cannot use __dma_page_dev_to_cpu because we don't have a
- * struct page for handle */
-
- dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_unmap_area);
-}
-
-static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir)
-{
-
- dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_map_area);
-}
-
-void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
-
-{
- if (xen_is_dma_coherent(hwdev))
- return;
- if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
- return;
-
- __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
-}
-
-void xen_dma_sync_single_for_cpu(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
- if (xen_is_dma_coherent(hwdev))
- return;
- __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
-}
-
-void xen_dma_sync_single_for_device(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
- if (xen_is_dma_coherent(hwdev))
- return;
- __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
-}
-
-int __init xen_mm32_init(void)
-{
- if (!xen_initial_domain())
- return 0;
-
- return 0;
-}
-arch_initcall(xen_mm32_init);
diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h
index dde3fc9..c34132b 100644
--- a/arch/arm64/include/asm/xen/page-coherent.h
+++ b/arch/arm64/include/asm/xen/page-coherent.h
@@ -5,6 +5,11 @@
#include <linux/dma-attrs.h>
#include <linux/dma-mapping.h>

+static inline bool xen_is_dma_coherent(struct device *dev)
+{
+ return (__generic_dma_ops(dev) == &coherent_swiotlb_dma_ops);
+}
+
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
@@ -23,21 +28,17 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
+ __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
}

-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
+void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
-{
-}
+ struct dma_attrs *attrs);

-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-}
+void xen_dma_sync_single_for_cpu(struct device *hwdev,
+ dma_addr_t handle, size_t size, enum dma_data_direction dir);
+
+void xen_dma_sync_single_for_device(struct device *hwdev,
+ dma_addr_t handle, size_t size, enum dma_data_direction dir);

-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-}
#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */
--
1.7.10.4

2014-10-23 17:36:34

by Stefano Stabellini

[permalink] [raw]
Subject: [PATCH v5 1/6] xen/arm: remove handling of XENFEAT_grant_map_identity

The feature has been removed from Xen. Also Linux cannot use it on ARM32
without CONFIG_ARM_LPAE.

Signed-off-by: Stefano Stabellini <[email protected]>
Reviewed-by: David Vrabel <[email protected]>
Acked-by: Ian Campbell <[email protected]>

---
Changes in v2:

- remove the definition of XENFEAT_grant_map_identity.
---
arch/arm/xen/enlighten.c | 5 ---
arch/arm/xen/mm32.c | 85 +-------------------------------------
include/xen/interface/features.h | 3 --
3 files changed, 1 insertion(+), 92 deletions(-)

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 0e15f01..c7ca936 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -261,11 +261,6 @@ static int __init xen_guest_init(void)

xen_setup_features();

- if (!xen_feature(XENFEAT_grant_map_identity)) {
- pr_warn("Please upgrade your Xen.\n"
- "If your platform has any non-coherent DMA devices, they won't work properly.\n");
- }
-
if (xen_feature(XENFEAT_dom0))
xen_start_info->flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
else
diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
index 3b99860..a5a93fc 100644
--- a/arch/arm/xen/mm32.c
+++ b/arch/arm/xen/mm32.c
@@ -5,70 +5,6 @@

#include <xen/features.h>

-static DEFINE_PER_CPU(unsigned long, xen_mm32_scratch_virt);
-static DEFINE_PER_CPU(pte_t *, xen_mm32_scratch_ptep);
-
-static int alloc_xen_mm32_scratch_page(int cpu)
-{
- struct page *page;
- unsigned long virt;
- pmd_t *pmdp;
- pte_t *ptep;
-
- if (per_cpu(xen_mm32_scratch_ptep, cpu) != NULL)
- return 0;
-
- page = alloc_page(GFP_KERNEL);
- if (page == NULL) {
- pr_warn("Failed to allocate xen_mm32_scratch_page for cpu %d\n", cpu);
- return -ENOMEM;
- }
-
- virt = (unsigned long)__va(page_to_phys(page));
- pmdp = pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
- ptep = pte_offset_kernel(pmdp, virt);
-
- per_cpu(xen_mm32_scratch_virt, cpu) = virt;
- per_cpu(xen_mm32_scratch_ptep, cpu) = ptep;
-
- return 0;
-}
-
-static int xen_mm32_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- int cpu = (long)hcpu;
- switch (action) {
- case CPU_UP_PREPARE:
- if (alloc_xen_mm32_scratch_page(cpu))
- return NOTIFY_BAD;
- break;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block xen_mm32_cpu_notifier = {
- .notifier_call = xen_mm32_cpu_notify,
-};
-
-static void* xen_mm32_remap_page(dma_addr_t handle)
-{
- unsigned long virt = get_cpu_var(xen_mm32_scratch_virt);
- pte_t *ptep = __get_cpu_var(xen_mm32_scratch_ptep);
-
- *ptep = pfn_pte(handle >> PAGE_SHIFT, PAGE_KERNEL);
- local_flush_tlb_kernel_page(virt);
-
- return (void*)virt;
-}
-
-static void xen_mm32_unmap(void *vaddr)
-{
- put_cpu_var(xen_mm32_scratch_virt);
-}
-

/* functions called by SWIOTLB */

@@ -88,13 +24,7 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,

if (!pfn_valid(pfn))
{
- /* Cannot map the page, we don't know its physical address.
- * Return and hope for the best */
- if (!xen_feature(XENFEAT_grant_map_identity))
- return;
- vaddr = xen_mm32_remap_page(handle) + offset;
- op(vaddr, len, dir);
- xen_mm32_unmap(vaddr - offset);
+ /* TODO: cache flush */
} else {
struct page *page = pfn_to_page(pfn);

@@ -181,22 +111,9 @@ void xen_dma_sync_single_for_device(struct device *hwdev,

int __init xen_mm32_init(void)
{
- int cpu;
-
if (!xen_initial_domain())
return 0;

- register_cpu_notifier(&xen_mm32_cpu_notifier);
- get_online_cpus();
- for_each_online_cpu(cpu) {
- if (alloc_xen_mm32_scratch_page(cpu)) {
- put_online_cpus();
- unregister_cpu_notifier(&xen_mm32_cpu_notifier);
- return -ENOMEM;
- }
- }
- put_online_cpus();
-
return 0;
}
arch_initcall(xen_mm32_init);
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
index 14334d0..131a6cc 100644
--- a/include/xen/interface/features.h
+++ b/include/xen/interface/features.h
@@ -53,9 +53,6 @@
/* operation as Dom0 is supported */
#define XENFEAT_dom0 11

-/* Xen also maps grant references at pfn = mfn */
-#define XENFEAT_grant_map_identity 12
-
#define XENFEAT_NR_SUBMAPS 1

#endif /* __XEN_PUBLIC_FEATURES_H__ */
--
1.7.10.4

2014-10-24 09:23:17

by Ian Campbell

[permalink] [raw]
Subject: Re: [PATCH v5 2/6] xen/arm: remove outer_*_range call

On Thu, 2014-10-23 at 18:27 +0100, Stefano Stabellini wrote:
> Dom0 is not actually capable of issuing outer_inv_range or
> outer_clean_range calls.
>
> Signed-off-by: Stefano Stabellini <[email protected]>

Acked-by: Ian Campbell <[email protected]>

2014-10-24 09:25:31

by Ian Campbell

[permalink] [raw]
Subject: Re: [PATCH v5 3/6] xen/arm: introduce xen_is_dma_coherent

On Thu, 2014-10-23 at 18:27 +0100, Stefano Stabellini wrote:
> Introduce a utility function to check whether a device is dma coherent.
> The current implementation is suboptimal. Let's rework it later in
> common arch/arm code.
>
> Use it to figure out whether we need to issue cache maintenance
> operations instead of checking on the existence of a particular dma_ops
> function for the device.
>
> Signed-off-by: Stefano Stabellini <[email protected]>

As well as refactoring this also changes the nature of the test, making
it tighter AFAICT. Perhaps it is the same because you happen to know
that arm_coherent_dma_ops is the only possible one which doesn't set the
hooks you were checking before.

Either way this should be mentioned in the commit log. With that done:

Acked-by: Ian Campbell <[email protected]>

>
> ---
>
> Changes in v5:
> - introduce xen_is_dma_coherent as a xen specific function;
> - call xen_is_dma_coherent instead of is_dma_coherent.
> ---
> arch/arm/include/asm/xen/page-coherent.h | 5 +++++
> arch/arm/xen/mm32.c | 6 +++---
> 2 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
> index e8275ea..6f0f882 100644
> --- a/arch/arm/include/asm/xen/page-coherent.h
> +++ b/arch/arm/include/asm/xen/page-coherent.h
> @@ -5,6 +5,11 @@
> #include <linux/dma-attrs.h>
> #include <linux/dma-mapping.h>
>
> +static inline bool xen_is_dma_coherent(struct device *dev)
> +{
> + return (__generic_dma_ops(dev) == &arm_coherent_dma_ops);
> +}
> +
> static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
> dma_addr_t *dma_handle, gfp_t flags,
> struct dma_attrs *attrs)
> diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
> index 6153d61..6c0224e 100644
> --- a/arch/arm/xen/mm32.c
> +++ b/arch/arm/xen/mm32.c
> @@ -76,7 +76,7 @@ void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
> struct dma_attrs *attrs)
>
> {
> - if (!__generic_dma_ops(hwdev)->unmap_page)
> + if (xen_is_dma_coherent(hwdev))
> return;
> if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
> return;
> @@ -87,7 +87,7 @@ void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
> void xen_dma_sync_single_for_cpu(struct device *hwdev,
> dma_addr_t handle, size_t size, enum dma_data_direction dir)
> {
> - if (!__generic_dma_ops(hwdev)->sync_single_for_cpu)
> + if (xen_is_dma_coherent(hwdev))
> return;
> __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
> }
> @@ -95,7 +95,7 @@ void xen_dma_sync_single_for_cpu(struct device *hwdev,
> void xen_dma_sync_single_for_device(struct device *hwdev,
> dma_addr_t handle, size_t size, enum dma_data_direction dir)
> {
> - if (!__generic_dma_ops(hwdev)->sync_single_for_device)
> + if (xen_is_dma_coherent(hwdev))
> return;
> __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
> }

2014-10-24 09:28:10

by Ian Campbell

[permalink] [raw]
Subject: Re: [PATCH v5 4/6] xen/arm/arm64: merge xen/mm32.c into xen/mm.c

On Thu, 2014-10-23 at 18:27 +0100, Stefano Stabellini wrote:
> Merge xen/mm32.c into xen/mm.c.
> As a consequence the code gets compiled on arm64 too: introduce a few
> compat functions to actually be able to compile it.
> Introduce xen_is_dma_coherent on arm64 and call into the dma_ops
> functions from the xen_dma wrappers in page-coherent.h to handle
> non-coherent devices.
>
> Signed-off-by: Stefano Stabellini <[email protected]>

Acked-by: Ian Campbell <[email protected]>

2014-10-24 09:30:15

by Ian Campbell

[permalink] [raw]
Subject: Re: [PATCH v5 5/6] xen/arm/arm64: introduce xen_arch_need_swiotlb

On Thu, 2014-10-23 at 18:27 +0100, Stefano Stabellini wrote:
> Introduce an arch specific function to find out whether a particular dma
> mapping operation needs to bounce on the swiotlb buffer.
>
> On ARM and ARM64, if the page involved is a foreign page and the device
> is not coherent, we need to bounce because at unmap time we cannot
> execute any required cache maintenance operations (we don't know how to
> find the pfn from the mfn).
>
> No change of behaviour for x86.
>
> Signed-off-by: Stefano Stabellini <[email protected]>

Your tab stops are messed up on almost every new line you added. Looks
like you have ts=4 or something.

The actual changes look ok though, so with the coding style fixed you
can add:
Acked-by: Ian Campbell <[email protected]>

>
> ---
>
> Changes in v5:
> - fix indentation.
> ---
> arch/arm/include/asm/xen/page.h | 4 ++++
> arch/arm/xen/mm.c | 7 +++++++
> arch/x86/include/asm/xen/page.h | 7 +++++++
> drivers/xen/swiotlb-xen.c | 5 ++++-
> 4 files changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
> index 135c24a..fb7839d 100644
> --- a/arch/arm/include/asm/xen/page.h
> +++ b/arch/arm/include/asm/xen/page.h
> @@ -107,4 +107,8 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
> #define xen_remap(cookie, size) ioremap_cache((cookie), (size))
> #define xen_unmap(cookie) iounmap((cookie))
>
> +bool xen_arch_need_swiotlb(struct device *dev,
> + unsigned long pfn,
> + unsigned long mfn);
> +
> #endif /* _ASM_ARM_XEN_PAGE_H */
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index 659e1c3..728c245 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -140,6 +140,13 @@ void xen_dma_sync_single_for_device(struct device *hwdev,
> __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
> }
>
> +bool xen_arch_need_swiotlb(struct device *dev,
> + unsigned long pfn,
> + unsigned long mfn)
> +{
> + return ((pfn != mfn) && !xen_is_dma_coherent(dev));
> +}
> +
> int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
> unsigned int address_bits,
> dma_addr_t *dma_handle)
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index c949923..800b6a9 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -236,4 +236,11 @@ void make_lowmem_page_readwrite(void *vaddr);
> #define xen_remap(cookie, size) ioremap((cookie), (size));
> #define xen_unmap(cookie) iounmap((cookie))
>
> +static inline bool xen_arch_need_swiotlb(struct device *dev,
> + unsigned long pfn,
> + unsigned long mfn)
> +{
> + return false;
> +}
> +
> #endif /* _ASM_X86_XEN_PAGE_H */
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index ebd8f21..ac0bd60 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -399,7 +399,9 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
> * buffering it.
> */
> if (dma_capable(dev, dev_addr, size) &&
> - !range_straddles_page_boundary(phys, size) && !swiotlb_force) {
> + !range_straddles_page_boundary(phys, size) &&
> + !xen_arch_need_swiotlb(dev, PFN_DOWN(phys), PFN_DOWN(dev_addr)) &&
> + !swiotlb_force) {
> /* we are not interested in the dma_addr returned by
> * xen_dma_map_page, only in the potential cache flushes executed
> * by the function. */
> @@ -557,6 +559,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
> dma_addr_t dev_addr = xen_phys_to_bus(paddr);
>
> if (swiotlb_force ||
> + xen_arch_need_swiotlb(hwdev, PFN_DOWN(paddr), PFN_DOWN(dev_addr)) ||
> !dma_capable(hwdev, dev_addr, sg->length) ||
> range_straddles_page_boundary(paddr, sg->length)) {
> phys_addr_t map = swiotlb_tbl_map_single(hwdev,