Hi Ingo,
Here's some patches to clean up swiotlb to prepare for some Xen dom0
patches. These have been posted before, but undergone a round of
cleanups to address various comments.
Thanks,
J
The last usage was removed by the patch set culminating in
commit e3c449f526cebb8d287241c7e82faafd9709668b
Author: Joerg Roedel <[email protected]>
Date: Wed Oct 15 22:02:11 2008 -0700
x86, AMD IOMMU: convert driver to generic iommu_num_pages function
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected] -r 9b89e3b4ca90 arch/x86/include/asm/iommu.h
---
arch/x86/include/asm/iommu.h | 2 --
arch/x86/kernel/pci-dma.c | 7 -------
2 files changed, 9 deletions(-)
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -7,8 +7,6 @@
extern int force_iommu, no_iommu;
extern int iommu_detected;
-extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
-
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -121,13 +121,6 @@
pci_swiotlb_init();
}
-unsigned long iommu_nr_pages(unsigned long addr, unsigned long len)
-{
- unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
-
- return size >> PAGE_SHIFT;
-}
-EXPORT_SYMBOL(iommu_nr_pages);
#endif
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
Architectures may need to allocate memory specially for use with
the swiotlb. Create the weak function swiotlb_alloc_boot() and
swiotlb_alloc() defaulting to the current behaviour.
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Signed-off-by: Ian Campbell <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Jan Beulich <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: FUJITA Tomonori <[email protected]>
---
include/linux/swiotlb.h | 3 +++
lib/swiotlb.c | 16 +++++++++++++---
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -10,6 +10,9 @@
extern void
swiotlb_init(void);
+extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs);
+extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
+
extern void
*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -21,6 +21,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/spinlock.h>
+#include <linux/swiotlb.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ctype.h>
@@ -126,6 +127,16 @@
__setup("swiotlb=", setup_io_tlb_npages);
/* make io_tlb_overflow tunable too? */
+void * __weak swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+{
+ return alloc_bootmem_low_pages(size);
+}
+
+void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
+{
+ return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
+}
+
/*
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
@@ -145,7 +156,7 @@
/*
* Get IO TLB memory from the low pages
*/
- io_tlb_start = alloc_bootmem_low_pages(bytes);
+ io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
if (!io_tlb_start)
panic("Cannot allocate SWIOTLB buffer");
io_tlb_end = io_tlb_start + bytes;
@@ -202,8 +213,7 @@
bytes = io_tlb_nslabs << IO_TLB_SHIFT;
while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
- io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
- order);
+ io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs);
if (io_tlb_start)
break;
order--;
From: Ian Campbell <[email protected]>
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
include/linux/swiotlb.h | 14 ++++++++++++++
lib/swiotlb.c | 14 +-------------
2 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -7,6 +7,20 @@
struct dma_attrs;
struct scatterlist;
+/*
+ * Maximum allowable number of contiguous slabs to map,
+ * must be a power of 2. What is the appropriate value ?
+ * The complexity of {map,unmap}_single is linearly dependent on this value.
+ */
+#define IO_TLB_SEGSIZE 128
+
+
+/*
+ * log of the size of each IO TLB slab. The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
extern void
swiotlb_init(void);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -23,6 +23,7 @@
#include <linux/spinlock.h>
#include <linux/swiotlb.h>
#include <linux/string.h>
+#include <linux/swiotlb.h>
#include <linux/types.h>
#include <linux/ctype.h>
@@ -40,19 +41,6 @@
#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))
-/*
- * Maximum allowable number of contiguous slabs to map,
- * must be a power of 2. What is the appropriate value ?
- * The complexity of {map,unmap}_single is linearly dependent on this value.
- */
-#define IO_TLB_SEGSIZE 128
-
-/*
- * log of the size of each IO TLB slab. The number of slabs is command line
- * controllable.
- */
-#define IO_TLB_SHIFT 11
-
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
/*
Signed-off-by: Ian Campbell <[email protected]>
---
lib/swiotlb.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -465,13 +465,9 @@
dma_addr_t dev_addr;
void *ret;
int order = get_order(size);
- u64 dma_mask = DMA_32BIT_MASK;
-
- if (hwdev && hwdev->coherent_dma_mask)
- dma_mask = hwdev->coherent_dma_mask;
ret = (void *)__get_free_pages(flags, order);
- if (ret && !is_buffer_dma_capable(dma_mask, virt_to_bus(ret), size)) {
+ if (ret && !address_needs_mapping(hwdev, virt_to_bus(ret), size)) {
/*
* The allocated memory isn't reachable by the device.
* Fall back on swiotlb_map_single().
@@ -495,9 +491,9 @@
dev_addr = virt_to_bus(ret);
/* Confirm address can be DMA'd by device */
- if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
+ if (!address_needs_mapping(hwdev, dev_addr, size)) {
printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
- (unsigned long long)dma_mask,
+ (unsigned long long)dma_get_mask(hwdev),
(unsigned long long)dev_addr);
/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
From: Ian Campbell <[email protected]>
Architectures may need to override these conversions. Implement a
__weak hook point containing the default implementation.
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
include/linux/swiotlb.h | 3 ++
lib/swiotlb.c | 52 ++++++++++++++++++++++++++++++++---------------
2 files changed, 39 insertions(+), 16 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -27,6 +27,9 @@
extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs);
extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
+extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address);
+extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
+
extern void
*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -125,6 +125,26 @@
return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
}
+dma_addr_t __weak swiotlb_phys_to_bus(phys_addr_t paddr)
+{
+ return paddr;
+}
+
+phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr)
+{
+ return baddr;
+}
+
+static dma_addr_t swiotlb_virt_to_bus(volatile void *address)
+{
+ return swiotlb_phys_to_bus(virt_to_phys(address));
+}
+
+static void *swiotlb_bus_to_virt(dma_addr_t address)
+{
+ return phys_to_virt(swiotlb_bus_to_phys(address));
+}
+
/*
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
@@ -168,7 +188,7 @@
panic("Cannot allocate SWIOTLB overflow buffer!\n");
printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
- virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
+ swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end));
}
void __init
@@ -250,7 +270,7 @@
printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - "
"0x%lx\n", bytes >> 20,
- virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
+ swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end));
return 0;
@@ -298,7 +318,7 @@
unsigned long max_slots;
mask = dma_get_seg_boundary(hwdev);
- start_dma_addr = virt_to_bus(io_tlb_start) & mask;
+ start_dma_addr = swiotlb_virt_to_bus(io_tlb_start) & mask;
offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -471,7 +491,7 @@
int order = get_order(size);
ret = (void *)__get_free_pages(flags, order);
- if (ret && !address_needs_mapping(hwdev, virt_to_bus(ret), size)) {
+ if (ret && !address_needs_mapping(hwdev, swiotlb_virt_to_bus(ret), size)) {
/*
* The allocated memory isn't reachable by the device.
* Fall back on swiotlb_map_single().
@@ -492,7 +512,7 @@
}
memset(ret, 0, size);
- dev_addr = virt_to_bus(ret);
+ dev_addr = swiotlb_virt_to_bus(ret);
/* Confirm address can be DMA'd by device */
if (!address_needs_mapping(hwdev, dev_addr, size)) {
@@ -552,7 +572,7 @@
swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
int dir, struct dma_attrs *attrs)
{
- dma_addr_t dev_addr = virt_to_bus(ptr);
+ dma_addr_t dev_addr = swiotlb_virt_to_bus(ptr);
void *map;
BUG_ON(dir == DMA_NONE);
@@ -573,7 +593,7 @@
map = io_tlb_overflow_buffer;
}
- dev_addr = virt_to_bus(map);
+ dev_addr = swiotlb_virt_to_bus(map);
/*
* Ensure that the address returned is DMA'ble
@@ -603,7 +623,7 @@
swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
size_t size, int dir, struct dma_attrs *attrs)
{
- char *dma_addr = bus_to_virt(dev_addr);
+ char *dma_addr = swiotlb_bus_to_virt(dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dma_addr))
@@ -633,7 +653,7 @@
swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
size_t size, int dir, int target)
{
- char *dma_addr = bus_to_virt(dev_addr);
+ char *dma_addr = swiotlb_bus_to_virt(dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dma_addr))
@@ -664,7 +684,7 @@
unsigned long offset, size_t size,
int dir, int target)
{
- char *dma_addr = bus_to_virt(dev_addr) + offset;
+ char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset;
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dma_addr))
@@ -720,7 +740,7 @@
for_each_sg(sgl, sg, nelems, i) {
addr = SG_ENT_VIRT_ADDRESS(sg);
- dev_addr = virt_to_bus(addr);
+ dev_addr = swiotlb_virt_to_bus(addr);
if (swiotlb_force ||
address_needs_mapping(hwdev, dev_addr, sg->length)) {
void *map = map_single(hwdev, addr, sg->length, dir);
@@ -733,7 +753,7 @@
sgl[0].dma_length = 0;
return 0;
}
- sg->dma_address = virt_to_bus(map);
+ sg->dma_address = swiotlb_virt_to_bus(map);
} else
sg->dma_address = dev_addr;
sg->dma_length = sg->length;
@@ -764,7 +784,7 @@
for_each_sg(sgl, sg, nelems, i) {
if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
- unmap_single(hwdev, bus_to_virt(sg->dma_address),
+ unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
sg->dma_length, dir);
else if (dir == DMA_FROM_DEVICE)
dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
@@ -797,7 +817,7 @@
for_each_sg(sgl, sg, nelems, i) {
if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
- sync_single(hwdev, bus_to_virt(sg->dma_address),
+ sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
sg->dma_length, dir, target);
else if (dir == DMA_FROM_DEVICE)
dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
@@ -821,7 +841,7 @@
int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
{
- return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
+ return (dma_addr == swiotlb_virt_to_bus(io_tlb_overflow_buffer));
}
/*
@@ -833,7 +853,7 @@
int
swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return virt_to_bus(io_tlb_end - 1) <= mask;
+ return swiotlb_virt_to_bus(io_tlb_end - 1) <= mask;
}
EXPORT_SYMBOL(swiotlb_map_single);
From: Ian Campbell <[email protected]>
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
lib/swiotlb.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -301,6 +301,10 @@
start_dma_addr = virt_to_bus(io_tlb_start) & mask;
offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+
+ /*
+ * Carefully handle integer overflow which can occur when mask == ~0UL.
+ */
max_slots = mask + 1
? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
: 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
From: Ian Campbell <[email protected]>
Some architectures require special rules to determine whether a range
needs mapping or not. This adds a weak function for architectures to
override.
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
include/linux/swiotlb.h | 2 ++
lib/swiotlb.c | 15 +++++++++++++--
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -30,6 +30,8 @@
extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address);
extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
+extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size);
+
extern void
*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -145,6 +145,11 @@
return phys_to_virt(swiotlb_bus_to_phys(address));
}
+int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
+{
+ return 0;
+}
+
/*
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
@@ -297,6 +302,11 @@
return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
}
+static inline int range_needs_mapping(void *ptr, size_t size)
+{
+ return swiotlb_force || swiotlb_arch_range_needs_mapping(ptr, size);
+}
+
static int is_swiotlb_buffer(char *addr)
{
return addr >= io_tlb_start && addr < io_tlb_end;
@@ -581,7 +591,8 @@
* we can safely return the device addr and not worry about bounce
* buffering it.
*/
- if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force)
+ if (!address_needs_mapping(hwdev, dev_addr, size) &&
+ !range_needs_mapping(ptr, size))
return dev_addr;
/*
@@ -741,7 +752,7 @@
for_each_sg(sgl, sg, nelems, i) {
addr = SG_ENT_VIRT_ADDRESS(sg);
dev_addr = swiotlb_virt_to_bus(addr);
- if (swiotlb_force ||
+ if (range_needs_mapping(sg_virt(sg), sg->length) ||
address_needs_mapping(hwdev, dev_addr, sg->length)) {
void *map = map_single(hwdev, addr, sg->length, dir);
if (!map) {
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
lib/swiotlb.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -312,6 +312,15 @@
return addr >= io_tlb_start && addr < io_tlb_end;
}
+static void
+__sync_single(char *buffer, char *dma_addr, size_t size, int dir)
+{
+ if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, buffer, size);
+ else
+ memcpy(buffer, dma_addr, size);
+}
+
/*
* Allocates bounce buffer and returns its kernel virtual address.
*/
@@ -413,7 +422,7 @@
for (i = 0; i < nslots; i++)
io_tlb_orig_addr[index+i] = buffer + (i << IO_TLB_SHIFT);
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- memcpy(dma_addr, buffer, size);
+ __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
return dma_addr;
}
@@ -437,7 +446,7 @@
* bounce... copy the data back into the original buffer * and
* delete the bounce buffer.
*/
- memcpy(buffer, dma_addr, size);
+ __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
/*
* Return the buffer to the free list by setting the corresponding
@@ -477,13 +486,13 @@
switch (target) {
case SYNC_FOR_CPU:
if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- memcpy(buffer, dma_addr, size);
+ __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
else
BUG_ON(dir != DMA_TO_DEVICE);
break;
case SYNC_FOR_DEVICE:
if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- memcpy(dma_addr, buffer, size);
+ __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
else
BUG_ON(dir != DMA_FROM_DEVICE);
break;
From: Ian Campbell <[email protected]>
Remove duplicated swiotlb info printing, and make it more detailed.
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
lib/swiotlb.c | 33 ++++++++++++++++++++++++++++-----
1 file changed, 28 insertions(+), 5 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -156,6 +156,32 @@
return swiotlb_phys_to_bus(page_to_phys(sg_page(sg)) + sg->offset);
}
+static void swiotlb_print_info(unsigned long bytes)
+{
+ phys_addr_t pstart, pend;
+ dma_addr_t bstart, bend;
+
+ pstart = virt_to_phys(io_tlb_start);
+ pend = virt_to_phys(io_tlb_end);
+
+ bstart = swiotlb_phys_to_bus(pstart);
+ bend = swiotlb_phys_to_bus(pend);
+
+ printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n",
+ bytes >> 20, io_tlb_start, io_tlb_end);
+ if (pstart != bstart || pend != bend)
+ printk(KERN_INFO "software IO TLB at phys %#llx - %#llx"
+ " bus %#llx - %#llx\n",
+ (unsigned long long)pstart,
+ (unsigned long long)pend,
+ (unsigned long long)bstart,
+ (unsigned long long)bend);
+ else
+ printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n",
+ (unsigned long long)pstart,
+ (unsigned long long)pend);
+}
+
/*
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
@@ -198,8 +224,7 @@
if (!io_tlb_overflow_buffer)
panic("Cannot allocate SWIOTLB overflow buffer!\n");
- printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
- swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end));
+ swiotlb_print_info(bytes);
}
void __init
@@ -279,9 +304,7 @@
if (!io_tlb_overflow_buffer)
goto cleanup4;
- printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - "
- "0x%lx\n", bytes >> 20,
- swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end));
+ swiotlb_print_info(bytes);
return 0;
Add x86-specific swiotlb allocation functions. These are purely
default for the moment.
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
arch/x86/kernel/pci-swiotlb_64.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -3,6 +3,8 @@
#include <linux/pci.h>
#include <linux/cache.h>
#include <linux/module.h>
+#include <linux/swiotlb.h>
+#include <linux/bootmem.h>
#include <linux/dma-mapping.h>
#include <asm/iommu.h>
@@ -11,6 +13,16 @@
int swiotlb __read_mostly;
+void *swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+{
+ return alloc_bootmem_low_pages(size);
+}
+
+void *swiotlb_alloc(unsigned order, unsigned long nslabs)
+{
+ return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
+}
+
static dma_addr_t
swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
int direction)
swiotlb on 32 bit will be used by Xen domain 0 support.
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
arch/x86/include/asm/dma-mapping.h | 2 +-
arch/x86/include/asm/pci.h | 2 ++
arch/x86/include/asm/pci_64.h | 1 -
arch/x86/kernel/Makefile | 3 ++-
arch/x86/kernel/pci-dma.c | 6 ++++--
arch/x86/kernel/pci-swiotlb_64.c | 2 ++
arch/x86/mm/init_32.c | 3 +++
7 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -66,7 +66,7 @@
return dma_ops;
else
return dev->archdata.dma_ops;
-#endif /* _ASM_X86_DMA_MAPPING_H */
+#endif
}
/* Make sure we keep the same behaviour */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -84,6 +84,8 @@
static inline void early_quirks(void) { }
#endif
+extern void pci_iommu_alloc(void);
+
#endif /* __KERNEL__ */
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -23,7 +23,6 @@
int reg, int len, u32 value);
extern void dma32_reserve_bootmem(void);
-extern void pci_iommu_alloc(void);
/* The PCI address space does equal the physical memory
* address space. The networking and block device layers use
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -110,6 +110,8 @@
obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
+obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
@@ -123,7 +125,6 @@
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
- obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
endif
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -101,11 +101,15 @@
dma32_bootmem_ptr = NULL;
dma32_bootmem_size = 0;
}
+#endif
void __init pci_iommu_alloc(void)
{
+#ifdef CONFIG_X86_64
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
+#endif
+
/*
* The order of these functions is important for
* fall-back/fail-over reasons
@@ -121,8 +125,6 @@
pci_swiotlb_init();
}
-#endif
-
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag)
{
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -62,8 +62,10 @@
void __init pci_swiotlb_init(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
+#ifdef CONFIG_X86_64
if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
swiotlb = 1;
+#endif
if (swiotlb_force)
swiotlb = 1;
if (swiotlb) {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
+#include <linux/pci.h>
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/bootmem.h>
@@ -971,6 +972,8 @@
int codesize, reservedpages, datasize, initsize;
int tmp;
+ pci_iommu_alloc();
+
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
From: Ian Campbell <[email protected]>
Xen will override these later on.
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
arch/x86/kernel/pci-swiotlb_64.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -23,6 +23,16 @@
return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
}
+dma_addr_t swiotlb_phys_to_bus(phys_addr_t paddr)
+{
+ return paddr;
+}
+
+phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+{
+ return baddr;
+}
+
static dma_addr_t
swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
int direction)
This requires us to treat DMA regions in terms of page+offset rather
than virtual addressing since a HighMem page may not have a mapping.
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
lib/swiotlb.c | 120 +++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 88 insertions(+), 32 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -26,6 +26,7 @@
#include <linux/swiotlb.h>
#include <linux/types.h>
#include <linux/ctype.h>
+#include <linux/highmem.h>
#include <asm/io.h>
#include <asm/dma.h>
@@ -38,9 +39,6 @@
#define OFFSET(val,align) ((unsigned long) \
( (val) & ( (align) - 1)))
-#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
-#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))
-
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
/*
@@ -91,7 +89,10 @@
* We need to save away the original address corresponding to a mapped entry
* for the sync operations.
*/
-static unsigned char **io_tlb_orig_addr;
+static struct swiotlb_phys_addr {
+ struct page *page;
+ unsigned int offset;
+} *io_tlb_orig_addr;
/*
* Protect the above data structures in the map and unmap calls
@@ -150,6 +151,11 @@
return 0;
}
+static dma_addr_t swiotlb_sg_to_bus(struct scatterlist *sg)
+{
+ return swiotlb_phys_to_bus(page_to_phys(sg_page(sg)) + sg->offset);
+}
+
/*
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
@@ -183,7 +189,7 @@
for (i = 0; i < io_tlb_nslabs; i++)
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_index = 0;
- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
+ io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(struct swiotlb_phys_addr));
/*
* Get the overflow emergency buffer
@@ -258,12 +264,12 @@
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_index = 0;
- io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs * sizeof(char *)));
+ io_tlb_orig_addr = (struct swiotlb_phys_addr *)__get_free_pages(GFP_KERNEL,
+ get_order(io_tlb_nslabs * sizeof(struct swiotlb_phys_addr)));
if (!io_tlb_orig_addr)
goto cleanup3;
- memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
+ memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(struct swiotlb_phys_addr));
/*
* Get the overflow emergency buffer
@@ -312,20 +318,59 @@
return addr >= io_tlb_start && addr < io_tlb_end;
}
+static struct swiotlb_phys_addr swiotlb_bus_to_phys_addr(char *dma_addr)
+{
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ struct swiotlb_phys_addr buffer = io_tlb_orig_addr[index];
+ buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1);
+ buffer.page += buffer.offset >> PAGE_SHIFT;
+ buffer.offset &= PAGE_SIZE - 1;
+ return buffer;
+}
+
static void
-__sync_single(char *buffer, char *dma_addr, size_t size, int dir)
+__sync_single(struct swiotlb_phys_addr buffer, char *dma_addr, size_t size, int dir)
{
- if (dir == DMA_TO_DEVICE)
- memcpy(dma_addr, buffer, size);
- else
- memcpy(buffer, dma_addr, size);
+ if (PageHighMem(buffer.page)) {
+ size_t len, bytes;
+ char *dev, *host, *kmp;
+
+ len = size;
+ while (len != 0) {
+ unsigned long flags;
+
+ bytes = len;
+ if ((bytes + buffer.offset) > PAGE_SIZE)
+ bytes = PAGE_SIZE - buffer.offset;
+ local_irq_save(flags); /* protects KM_BOUNCE_READ */
+ kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ);
+ dev = dma_addr + size - len;
+ host = kmp + buffer.offset;
+ if (dir == DMA_FROM_DEVICE)
+ memcpy(host, dev, bytes);
+ else
+ memcpy(dev, host, bytes);
+ kunmap_atomic(kmp, KM_BOUNCE_READ);
+ local_irq_restore(flags);
+ len -= bytes;
+ buffer.page++;
+ buffer.offset = 0;
+ }
+ } else {
+ void *v = page_address(buffer.page) + buffer.offset;
+
+ if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, v, size);
+ else
+ memcpy(v, dma_addr, size);
+ }
}
/*
* Allocates bounce buffer and returns its kernel virtual address.
*/
static void *
-map_single(struct device *hwdev, char *buffer, size_t size, int dir)
+map_single(struct device *hwdev, struct swiotlb_phys_addr buffer, size_t size, int dir)
{
unsigned long flags;
char *dma_addr;
@@ -335,6 +380,7 @@
unsigned long mask;
unsigned long offset_slots;
unsigned long max_slots;
+ struct swiotlb_phys_addr slot_buf;
mask = dma_get_seg_boundary(hwdev);
start_dma_addr = swiotlb_virt_to_bus(io_tlb_start) & mask;
@@ -419,8 +465,13 @@
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
- for (i = 0; i < nslots; i++)
- io_tlb_orig_addr[index+i] = buffer + (i << IO_TLB_SHIFT);
+ slot_buf = buffer;
+ for (i = 0; i < nslots; i++) {
+ slot_buf.page += slot_buf.offset >> PAGE_SHIFT;
+ slot_buf.offset &= PAGE_SIZE - 1;
+ io_tlb_orig_addr[index+i] = slot_buf;
+ slot_buf.offset += 1 << IO_TLB_SHIFT;
+ }
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
__sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
@@ -436,12 +487,12 @@
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- char *buffer = io_tlb_orig_addr[index];
+ struct swiotlb_phys_addr buffer = swiotlb_bus_to_phys_addr(dma_addr);
/*
* First, sync the memory before unmapping the entry
*/
- if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+ if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
/*
* bounce... copy the data back into the original buffer * and
* delete the bounce buffer.
@@ -478,10 +529,7 @@
sync_single(struct device *hwdev, char *dma_addr, size_t size,
int dir, int target)
{
- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- char *buffer = io_tlb_orig_addr[index];
-
- buffer += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
+ struct swiotlb_phys_addr buffer = swiotlb_bus_to_phys_addr(dma_addr);
switch (target) {
case SYNC_FOR_CPU:
@@ -525,7 +573,10 @@
* swiotlb_map_single(), which will grab memory from
* the lowest available address range.
*/
- ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
+ struct swiotlb_phys_addr buffer;
+ buffer.page = virt_to_page(NULL);
+ buffer.offset = 0;
+ ret = map_single(hwdev, buffer, size, DMA_FROM_DEVICE);
if (!ret)
return NULL;
}
@@ -593,6 +644,7 @@
{
dma_addr_t dev_addr = swiotlb_virt_to_bus(ptr);
void *map;
+ struct swiotlb_phys_addr buffer;
BUG_ON(dir == DMA_NONE);
/*
@@ -607,7 +659,9 @@
/*
* Oh well, have to allocate and map a bounce buffer.
*/
- map = map_single(hwdev, ptr, size, dir);
+ buffer.page = virt_to_page(ptr);
+ buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
+ map = map_single(hwdev, buffer, size, dir);
if (!map) {
swiotlb_full(hwdev, size, dir, 1);
map = io_tlb_overflow_buffer;
@@ -752,18 +806,20 @@
int dir, struct dma_attrs *attrs)
{
struct scatterlist *sg;
- void *addr;
+ struct swiotlb_phys_addr buffer;
dma_addr_t dev_addr;
int i;
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
- dev_addr = swiotlb_virt_to_bus(addr);
+ dev_addr = swiotlb_sg_to_bus(sg);
if (range_needs_mapping(sg_virt(sg), sg->length) ||
address_needs_mapping(hwdev, dev_addr, sg->length)) {
- void *map = map_single(hwdev, addr, sg->length, dir);
+ void *map;
+ buffer.page = sg_page(sg);
+ buffer.offset = sg->offset;
+ map = map_single(hwdev, buffer, sg->length, dir);
if (!map) {
/* Don't panic here, we expect map_sg users
to do proper error handling. */
@@ -803,11 +859,11 @@
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i) {
- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ if (sg->dma_address != swiotlb_sg_to_bus(sg))
unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
sg->dma_length, dir);
else if (dir == DMA_FROM_DEVICE)
- dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+ dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
}
}
EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
@@ -836,11 +892,11 @@
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i) {
- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ if (sg->dma_address != swiotlb_sg_to_bus(sg))
sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
sg->dma_length, dir, target);
else if (dir == DMA_FROM_DEVICE)
- dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+ dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
}
}
From: Ian Campbell <[email protected]>
Xen will override these later on.
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
---
arch/x86/kernel/pci-swiotlb_64.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -33,6 +33,11 @@
return baddr;
}
+int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
+{
+ return 0;
+}
+
static dma_addr_t
swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
int direction)
* Jeremy Fitzhardinge <[email protected]> wrote:
> Hi Ingo,
>
> Here's some patches to clean up swiotlb to prepare for some Xen dom0
> patches. These have been posted before, but undergone a round of
> cleanups to address various comments.
applied to tip/core/iommu, thanks Jeremy.
the only patch that seems to have the potential to break drivers is:
be4ac7b: swiotlb: consistently use address_needs_mapping everywhere
looks fine to me, but the gents on the Cc: might have a dissenting
opinion?
Ingo
------------->
>From be4ac7b87b27380bc43fa4f686e39b46eca2c866 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <[email protected]>
Date: Tue, 16 Dec 2008 12:17:28 -0800
Subject: [PATCH] swiotlb: consistently use address_needs_mapping everywhere
Impact: remove open-coded DMA mask assumptions
Signed-off-by: Ian Campbell <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
lib/swiotlb.c | 10 +++-------
1 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index db724ba..76821f0 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -465,13 +465,9 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t dev_addr;
void *ret;
int order = get_order(size);
- u64 dma_mask = DMA_32BIT_MASK;
-
- if (hwdev && hwdev->coherent_dma_mask)
- dma_mask = hwdev->coherent_dma_mask;
ret = (void *)__get_free_pages(flags, order);
- if (ret && !is_buffer_dma_capable(dma_mask, virt_to_bus(ret), size)) {
+ if (ret && !address_needs_mapping(hwdev, virt_to_bus(ret), size)) {
/*
* The allocated memory isn't reachable by the device.
* Fall back on swiotlb_map_single().
@@ -495,9 +491,9 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dev_addr = virt_to_bus(ret);
/* Confirm address can be DMA'd by device */
- if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
+ if (!address_needs_mapping(hwdev, dev_addr, size)) {
printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
- (unsigned long long)dma_mask,
+ (unsigned long long)dma_get_mask(hwdev),
(unsigned long long)dev_addr);
/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
On Thu, 18 Dec 2008 12:17:54 -0600
Becky Bruce <[email protected]> wrote:
> > Can you be more specific? What architecture is plan to use highmem
> > support in swiotlb?
>
> 32-bit powerpc needs this support - I was actually about to push a
> similar set of patches. We have several processors that support 36
> bits of physical address space and do not have any iommu capability.
> The rest of the kernel support for those processors is now in place,
> so swiotlb is the last piece of the puzzle for that to be fully
> functional. I need to take a closer look at this series to see
> exactly what it's doing and how it differs from what I've been testing.
Ah, thanks,
Then I accept that highmem support in lib/swiotbl.c is generically
useful and we have to live with the ugly complication due to it.
On Dec 18, 2008, at 3:02 PM, Ingo Molnar wrote:
>
> * Becky Bruce <[email protected]> wrote:
>
>>> Can you be more specific? What architecture is plan to use highmem
>>> support in swiotlb?
>>
>> 32-bit powerpc needs this support - I was actually about to push a
>> similar set of patches. We have several processors that support 36
>> bits
>> of physical address space and do not have any iommu capability. The
>> rest of the kernel support for those processors is now in place, so
>> swiotlb is the last piece of the puzzle for that to be fully
>> functional.
>> I need to take a closer look at this series to see exactly what it's
>> doing and how it differs from what I've been testing.
>>
>> So there is another immediate use case, and I'd really hate to see
>> this
>> code duplicated. It should be entirely possible to remove the
>> assumption that we can save off the VA of the original buffer,
>> which is
>> the thing that precludes HIGHMEM support, and still have nice
>> readable,
>> maintainable code.
>
> you can have a look at Jeremy's current lineup of patches in
> tip/core/iotlb:
>
> http://people.redhat.com/mingo/tip.git/README
>
> (also integrated into tip/master)
>
> What would be needed to make it suit your usecase too?
Ingo,
I've taken a quick look at the series posted to the list, and they're
actually very similar to what I've done. I think there are really
only 3 fairly minor issues that need to be resolved to make this work
on ppc:
1) We need to add swiotlb_map/unmap_page calls, as those are part the
ppc dma_mapping_ops (in fact, we don't have map/unmap_single in the
dma_mapping_ops struct on ppc anymore - those just call map/
unmap_page). This is really just a matter of moving some code around
and making some minor changes, as swiotlb_map/unmap_single can call
swiotlb_map/unmap_page once the args have been converted. There's a
patch in my series that should make this pretty obvious.
2) To convert any address to/from a bus address, we also need the
hwdev pointer passed as an argument since ppc supports a per-device
offset accessed via the device ptr that is used to calculate the bus
address. I'd also given my conversion functions more generic names,
as it seemed highly likely that these would eventually be useful
outside of the swiotlb code.
3) powerpc uses enum dma_data_direction for the direction argument to
its dma_ops, which is, from reading the kernel docs, the correct
argument type for the DMA API functions. However, the iotlb/x86/ia64
code is currently using an int. This causes a build warning when we
initialize the dma_ops struct using the swiotlb funcs. Is there a
reason for the use of "int" in x86/ia64? The Right Thing(TM) here
seems to be to convert those over to using the enum, and I have a big
patch that starts doing that, but I've probably missed some places. I
could instead do some hackery on the ppc side, and leave the other
platforms alone, but I'd prefer to do it cleanly. Thoughts?
Unfortunately, this is horrible timing for me, as starting tomorrow,
I'm going to be offline for a week and a half or so in podunk
Louisiana with essentially no net access. I can integrate my code
into your tree and test on PPC as soon as I return to the real world.
I'll go ahead and post my current patch series for reference, since
I'm about to disappear - it's not been tested on anything other than
powerpc (which is all I have for testing - it's highly likely I've
done something that's bogus on x86), it needs some cleanup, and I've
only built the entire series on x86_64 (not the individual pieces),
but it should definitely give you an idea of what we need to make
things work on ppc - the code is stable running LTP on a 2-core 32-bit
MPC8641 board with 6GB of RAM.
Cheers,
Becky
When we enable swiotlb for platforms that support HIGHMEM, we
can no longer store the virtual address of the original dma
buffer, because that buffer might not have a permament mapping.
Change the iotlb code to instead store the physical address of
the original buffer.
Signed-off-by: Becky Bruce <[email protected]>
---
lib/swiotlb.c | 47 ++++++++++++++++++++++++-----------------------
1 files changed, 24 insertions(+), 23 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index ed4f44a..e9d5bf6 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -118,7 +118,7 @@ static unsigned int io_tlb_index;
* We need to save away the original address corresponding to a mapped entry
* for the sync operations.
*/
-static unsigned char **io_tlb_orig_addr;
+static phys_addr_t *io_tlb_orig_addr;
/*
* Protect the above data structures in the map and unmap calls
@@ -175,7 +175,7 @@ swiotlb_init_with_default_size(size_t default_size)
for (i = 0; i < io_tlb_nslabs; i++)
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_index = 0;
- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
+ io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
/*
* Get the overflow emergency buffer
@@ -253,12 +253,14 @@ swiotlb_late_init_with_default_size(size_t default_size)
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_index = 0;
- io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs * sizeof(char *)));
+ io_tlb_orig_addr = (phys_addr_t *)
+ __get_free_pages(GFP_KERNEL,
+ get_order(io_tlb_nslabs *
+ sizeof(phys_addr_t)));
if (!io_tlb_orig_addr)
goto cleanup3;
- memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
+ memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
/*
* Get the overflow emergency buffer
@@ -276,8 +278,8 @@ swiotlb_late_init_with_default_size(size_t default_size)
return 0;
cleanup4:
- free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
- sizeof(char *)));
+ free_pages((unsigned long)io_tlb_orig_addr,
+ get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
io_tlb_orig_addr = NULL;
cleanup3:
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
@@ -307,7 +309,7 @@ static int is_swiotlb_buffer(char *addr)
* Allocates bounce buffer and returns its kernel virtual address.
*/
static void *
-map_single(struct device *hwdev, char *buffer, size_t size, int dir)
+map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
{
unsigned long flags;
char *dma_addr;
@@ -398,9 +400,9 @@ found:
* needed.
*/
for (i = 0; i < nslots; i++)
- io_tlb_orig_addr[index+i] = buffer + (i << IO_TLB_SHIFT);
+ io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- memcpy(dma_addr, buffer, size);
+ memcpy(dma_addr, phys_to_virt(phys), size);
return dma_addr;
}
@@ -414,17 +416,17 @@ unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- char *buffer = io_tlb_orig_addr[index];
+ phys_addr_t phys = io_tlb_orig_addr[index];
/*
* First, sync the memory before unmapping the entry
*/
- if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+ if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
/*
* bounce... copy the data back into the original buffer * and
* delete the bounce buffer.
*/
- memcpy(buffer, dma_addr, size);
+ memcpy(phys_to_virt(phys), dma_addr, size);
/*
* Return the buffer to the free list by setting the corresponding
@@ -457,20 +459,20 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size,
int dir, int target)
{
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- char *buffer = io_tlb_orig_addr[index];
+ phys_addr_t phys = io_tlb_orig_addr[index];
- buffer += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
+ phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
switch (target) {
case SYNC_FOR_CPU:
if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- memcpy(buffer, dma_addr, size);
+ memcpy(phys_to_virt(phys), dma_addr, size);
else
BUG_ON(dir != DMA_TO_DEVICE);
break;
case SYNC_FOR_DEVICE:
if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- memcpy(dma_addr, buffer, size);
+ memcpy(dma_addr, phys_to_virt(phys), size);
else
BUG_ON(dir != DMA_FROM_DEVICE);
break;
@@ -509,7 +511,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
* swiotlb_map_single(), which will grab memory from
* the lowest available address range.
*/
- ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
+ ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
if (!ret)
return NULL;
}
@@ -591,7 +593,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
/*
* Oh well, have to allocate and map a bounce buffer.
*/
- map = map_single(hwdev, ptr, size, dir);
+ map = map_single(hwdev, virt_to_phys(ptr), size, dir);
if (!map) {
swiotlb_full(hwdev, size, dir, 1);
map = io_tlb_overflow_buffer;
@@ -736,18 +738,17 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
int dir, struct dma_attrs *attrs)
{
struct scatterlist *sg;
- void *addr;
dma_addr_t dev_addr;
int i;
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i) {
- addr = sg_virt(sg);
- dev_addr = virt_to_dma_addr(hwdev, addr);
+ dev_addr = SG_ENT_BUS_ADDRESS(hwdev, sg);
if (swiotlb_force ||
swiotlb_addr_needs_mapping(hwdev, dev_addr, sg->length)) {
- void *map = map_single(hwdev, addr, sg->length, dir);
+ void *map = map_single(hwdev, sg_phys(sg), sg->length,
+ dir);
if (!map) {
/* Don't panic here, we expect map_sg users
to do proper error handling. */
--
1.5.6.5
Also, change swiotlb_map/unmap_single to call the page
versions by converting the arguments into a page address and
offset.
Signed-off-by: Becky Bruce <[email protected]>
---
include/linux/swiotlb.h | 10 +++++++
lib/swiotlb.c | 63 ++++++++++++++++++++++++++++++++---------------
2 files changed, 53 insertions(+), 20 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index f4d1be3..ecb5eac 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -79,6 +79,16 @@ swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
unsigned long offset, size_t size,
enum dma_data_direction dir);
+extern dma_addr_t
+swiotlb_map_page_attrs(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs);
+
+extern void
+swiotlb_unmap_page_attrs(struct device *hwdev, dma_addr_t dev_addr, size_t size,
+ enum dma_data_direction dir, struct dma_attrs *attrs);
+
extern int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index fc906d5..166aa78 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -606,19 +606,17 @@ swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
}
}
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode. The
- * physical address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
- */
-dma_addr_t
-swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+dma_addr_t swiotlb_map_page_attrs(struct device *hwdev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
- dma_addr_t dev_addr = virt_to_dma_addr(hwdev, ptr);
+ dma_addr_t dev_addr;
void *map;
+ phys_addr_t phys;
+
+ phys = page_to_phys(page) + offset;
+ dev_addr = phys_to_dma_addr(hwdev, phys);
BUG_ON(dir == DMA_NONE);
/*
@@ -633,7 +631,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
/*
* Oh well, have to allocate and map a bounce buffer.
*/
- map = map_single(hwdev, virt_to_phys(ptr), size, dir);
+ map = map_single(hwdev, phys, size, dir);
if (!map) {
swiotlb_full(hwdev, size, dir, 1);
map = io_tlb_overflow_buffer;
@@ -646,9 +644,40 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
*/
if (swiotlb_addr_needs_mapping(hwdev, dev_addr, size))
panic("map_single: bounce buffer is not DMA'ble");
-
return dev_addr;
}
+EXPORT_SYMBOL(swiotlb_map_page_attrs);
+
+void swiotlb_unmap_page_attrs(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ char *dma_addr = dma_addr_to_virt(hwdev, dev_addr);
+
+ BUG_ON(dir == DMA_NONE);
+ if (is_swiotlb_buffer(dma_addr))
+ unmap_single(hwdev, dma_addr, size, dir);
+ else if (dir == DMA_FROM_DEVICE)
+ dma_mark_clean(dma_addr, size);
+
+}
+EXPORT_SYMBOL(swiotlb_unmap_page_attrs);
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode. The
+ * physical address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory until
+ * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ */
+dma_addr_t
+swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ return swiotlb_map_page_attrs(hwdev, virt_to_page(ptr),
+ (unsigned long)ptr % PAGE_SIZE,
+ size, dir, attrs);
+}
EXPORT_SYMBOL(swiotlb_map_single_attrs);
dma_addr_t
@@ -671,13 +700,7 @@ swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- char *dma_addr = dma_addr_to_virt(hwdev, dev_addr);
-
- BUG_ON(dir == DMA_NONE);
- if (is_swiotlb_buffer(dma_addr))
- unmap_single(hwdev, dma_addr, size, dir);
- else if (dir == DMA_FROM_DEVICE)
- dma_mark_clean(dma_addr, size);
+ return swiotlb_unmap_page_attrs(hwdev, dev_addr, size, dir, attrs);
}
EXPORT_SYMBOL(swiotlb_unmap_single_attrs);
--
1.5.6.5
Use these instead of virt to/from bus macros - those have
been deprecated on some architectures. Set up a weak definition
that defaults to the old behavior of calling virt_to_bus/bus_to_virt.
Add hwdev pointer as an argument - some architectures support
a per-device offset that is needed to get the bus address, and need
the hwdev pointer to get to that information.
Signed-off-by: Becky Bruce <[email protected]>
---
lib/swiotlb.c | 51 ++++++++++++++++++++++++++++++++++++---------------
1 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index ef09b4c..ed4f44a 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -33,10 +33,27 @@
#include <linux/bootmem.h>
#include <linux/iommu-helper.h>
+
+inline dma_addr_t __weak
+virt_to_dma_addr(struct device *hwdev, void *addr)
+{
+ return virt_to_bus(addr);
+}
+inline void *__weak
+dma_addr_to_virt(struct device *hwdev, dma_addr_t addr)
+{
+ return bus_to_virt(addr);
+}
+inline dma_addr_t __weak
+phys_to_dma_addr(struct device *hwdev, phys_addr_t addr)
+{
+ return (dma_addr_t)addr;
+}
+
#define OFFSET(val,align) ((unsigned long) \
( (val) & ( (align) - 1)))
-#define SG_ENT_BUS_ADDRESS(hwdev, sg) virt_to_bus(sg_virt(sg))
+#define SG_ENT_BUS_ADDRESS(hwdev, sg) phys_to_dma_addr(hwdev, sg_phys(sg))
/*
* Maximum allowable number of contiguous slabs to map,
@@ -302,7 +319,7 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir)
unsigned long max_slots;
mask = dma_get_seg_boundary(hwdev);
- start_dma_addr = virt_to_bus(io_tlb_start) & mask;
+ start_dma_addr = virt_to_dma_addr(hwdev, io_tlb_start) & mask;
offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
max_slots = mask + 1
@@ -475,7 +492,9 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_mask = hwdev->coherent_dma_mask;
ret = (void *)__get_free_pages(flags, order);
- if (ret && !is_buffer_dma_capable(dma_mask, virt_to_bus(ret), size)) {
+ if (ret && !is_buffer_dma_capable(dma_mask,
+ virt_to_dma_addr(hwdev, ret),
+ size)) {
/*
* The allocated memory isn't reachable by the device.
* Fall back on swiotlb_map_single().
@@ -496,7 +515,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
}
memset(ret, 0, size);
- dev_addr = virt_to_bus(ret);
+ dev_addr = virt_to_dma_addr(hwdev, ret);
/* Confirm address can be DMA'd by device */
if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
@@ -556,7 +575,7 @@ dma_addr_t
swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
int dir, struct dma_attrs *attrs)
{
- dma_addr_t dev_addr = virt_to_bus(ptr);
+ dma_addr_t dev_addr = virt_to_dma_addr(hwdev, ptr);
void *map;
BUG_ON(dir == DMA_NONE);
@@ -578,7 +597,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
map = io_tlb_overflow_buffer;
}
- dev_addr = virt_to_bus(map);
+ dev_addr = virt_to_dma_addr(hwdev, map);
/*
* Ensure that the address returned is DMA'ble
@@ -608,7 +627,7 @@ void
swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
size_t size, int dir, struct dma_attrs *attrs)
{
- char *dma_addr = bus_to_virt(dev_addr);
+ char *dma_addr = dma_addr_to_virt(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dma_addr))
@@ -638,7 +657,7 @@ static void
swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
size_t size, int dir, int target)
{
- char *dma_addr = bus_to_virt(dev_addr);
+ char *dma_addr = dma_addr_to_virt(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dma_addr))
@@ -669,7 +688,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
unsigned long offset, size_t size,
int dir, int target)
{
- char *dma_addr = bus_to_virt(dev_addr) + offset;
+ char *dma_addr = dma_addr_to_virt(hwdev, dev_addr) + offset;
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dma_addr))
@@ -725,7 +744,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
for_each_sg(sgl, sg, nelems, i) {
addr = sg_virt(sg);
- dev_addr = virt_to_bus(addr);
+ dev_addr = virt_to_dma_addr(hwdev, addr);
if (swiotlb_force ||
swiotlb_addr_needs_mapping(hwdev, dev_addr, sg->length)) {
void *map = map_single(hwdev, addr, sg->length, dir);
@@ -738,7 +757,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
sgl[0].dma_length = 0;
return 0;
}
- sg->dma_address = virt_to_bus(map);
+ sg->dma_address = virt_to_dma_addr(hwdev, map);
} else
sg->dma_address = dev_addr;
sg->dma_length = sg->length;
@@ -769,7 +788,8 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
for_each_sg(sgl, sg, nelems, i) {
if (sg->dma_address != SG_ENT_BUS_ADDRESS(hwdev, sg))
- unmap_single(hwdev, bus_to_virt(sg->dma_address),
+ unmap_single(hwdev,
+ dma_addr_to_virt(hwdev, sg->dma_address),
sg->dma_length, dir);
else if (dir == DMA_FROM_DEVICE)
dma_mark_clean(sg_virt(sg), sg->dma_length);
@@ -802,7 +822,8 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
for_each_sg(sgl, sg, nelems, i) {
if (sg->dma_address != SG_ENT_BUS_ADDRESS(hwdev, sg))
- sync_single(hwdev, bus_to_virt(sg->dma_address),
+ sync_single(hwdev,
+ dma_addr_to_virt(hwdev, sg->dma_address),
sg->dma_length, dir, target);
else if (dir == DMA_FROM_DEVICE)
dma_mark_clean(sg_virt(sg), sg->dma_length);
@@ -826,7 +847,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
{
- return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
+ return dma_addr == virt_to_dma_addr(hwdev, io_tlb_overflow_buffer);
}
/*
@@ -838,7 +859,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
int
swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return virt_to_bus(io_tlb_end - 1) <= mask;
+ return virt_to_dma_addr(hwdev, io_tlb_end - 1) <= mask;
}
EXPORT_SYMBOL(swiotlb_map_single);
--
1.5.6.5
Ingo, Jeremy,
For reference, here's the set of patches I've been testing on powerpc
using swiotlb with HIGHMEM enabled. I think if we can deal with the
issues I mentioned in my previous mail, we should be able to use the
patch series Jeremy published on ppc - I'm just sticking this out here
since I'm about to fall off the internet for a while. I'll look at
getting your code working on ppc as soon as I'm back online.
Cheers,
Becky
All SG_ENT_VIRT_ADDRESS does is call sg_virt(), get rid of
this needless layer of complexity.
Signed-off-by: Becky Bruce <[email protected]>
---
lib/swiotlb.c | 9 ++++-----
1 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 96e8b10..3d9ac10 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -36,8 +36,7 @@
#define OFFSET(val,align) ((unsigned long) \
( (val) & ( (align) - 1)))
-#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
-#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))
+#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(sg_virt(sg))
/*
* Maximum allowable number of contiguous slabs to map,
@@ -721,7 +720,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
+ addr = sg_virt(sg);
dev_addr = virt_to_bus(addr);
if (swiotlb_force ||
address_needs_mapping(hwdev, dev_addr, sg->length)) {
@@ -769,7 +768,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
unmap_single(hwdev, bus_to_virt(sg->dma_address),
sg->dma_length, dir);
else if (dir == DMA_FROM_DEVICE)
- dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+ dma_mark_clean(sg_virt(sg), sg->dma_length);
}
}
EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
@@ -802,7 +801,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
sync_single(hwdev, bus_to_virt(sg->dma_address),
sg->dma_length, dir, target);
else if (dir == DMA_FROM_DEVICE)
- dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+ dma_mark_clean(sg_virt(sg), sg->dma_length);
}
}
--
1.5.6.5
Rename it swiotlb_addr_needs_mapping and make it weak
so architectures can override it if needed.
Signed-off-by: Becky Bruce <[email protected]>
---
lib/swiotlb.c | 11 ++++++-----
1 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 3d9ac10..9b89a9d 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -272,8 +272,8 @@ cleanup1:
return -ENOMEM;
}
-static int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
+int __weak
+swiotlb_addr_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
{
return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
}
@@ -562,7 +562,8 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
* we can safely return the device addr and not worry about bounce
* buffering it.
*/
- if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force)
+ if (!swiotlb_addr_needs_mapping(hwdev, dev_addr, size) &&
+ !swiotlb_force)
return dev_addr;
/*
@@ -579,7 +580,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
/*
* Ensure that the address returned is DMA'ble
*/
- if (address_needs_mapping(hwdev, dev_addr, size))
+ if (swiotlb_addr_needs_mapping(hwdev, dev_addr, size))
panic("map_single: bounce buffer is not DMA'ble");
return dev_addr;
@@ -723,7 +724,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
addr = sg_virt(sg);
dev_addr = virt_to_bus(addr);
if (swiotlb_force ||
- address_needs_mapping(hwdev, dev_addr, sg->length)) {
+ swiotlb_addr_needs_mapping(hwdev, dev_addr, sg->length)) {
void *map = map_single(hwdev, addr, sg->length, dir);
if (!map) {
/* Don't panic here, we expect map_sg users
--
1.5.6.5
On highmem systems, the original dma buffer might not
have a virtual mapping - we need to kmap it in to perform
the bounce. Extract the code that does the actual
copy into a function that does the kmap if highmem
is enabled, and defaults to the normal swiotlb memcpy
if not.
Signed-off-by: Becky Bruce <[email protected]>
---
lib/swiotlb.c | 53 +++++++++++++++++++++++++++++++++++++++++++++--------
1 files changed, 45 insertions(+), 8 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index e9d5bf6..ab5d3d7 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -14,6 +14,7 @@
* 04/07/.. ak Better overflow handling. Assorted fixes.
* 05/09/10 linville Add support for syncing ranges, support syncing for
* DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
+ * 08/12/11 beckyb Add highmem support
*/
#include <linux/cache.h>
@@ -24,6 +25,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ctype.h>
+#include <linux/highmem.h>
#include <asm/io.h>
#include <asm/dma.h>
@@ -306,6 +308,45 @@ static int is_swiotlb_buffer(char *addr)
}
/*
+ * Bounce: copy the swiotlb buffer back to the original dma location
+ */
+void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
+{
+#ifdef CONFIG_HIGHMEM
+ /* The buffer may not have a mapping. Map it in and copy */
+ unsigned int offset = ((unsigned long)phys &
+ ((1 << PAGE_SHIFT) - 1));
+ char *buffer;
+ unsigned int sz = 0;
+ unsigned long flags;
+
+ while (size) {
+ sz = ((PAGE_SIZE - offset) > size) ? size :
+ PAGE_SIZE - offset;
+ local_irq_save(flags);
+ buffer = kmap_atomic(pfn_to_page(phys >> PAGE_SHIFT),
+ KM_BOUNCE_READ);
+ if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, buffer + offset, sz);
+ else
+ memcpy(buffer + offset, dma_addr, sz);
+ kunmap_atomic(buffer, KM_BOUNCE_READ);
+ local_irq_restore(flags);
+ size -= sz;
+ phys += sz;
+ dma_addr += sz;
+ offset = 0;
+ }
+#else
+ if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, phys_to_virt(phys), size);
+ else
+ memcpy(phys_to_virt(phys), dma_addr, size);
+#endif
+}
+
+/*
* Allocates bounce buffer and returns its kernel virtual address.
*/
static void *
@@ -402,7 +443,7 @@ found:
for (i = 0; i < nslots; i++)
io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- memcpy(dma_addr, phys_to_virt(phys), size);
+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
return dma_addr;
}
@@ -422,11 +463,7 @@ unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
* First, sync the memory before unmapping the entry
*/
if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
- /*
- * bounce... copy the data back into the original buffer * and
- * delete the bounce buffer.
- */
- memcpy(phys_to_virt(phys), dma_addr, size);
+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
/*
* Return the buffer to the free list by setting the corresponding
@@ -466,13 +503,13 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size,
switch (target) {
case SYNC_FOR_CPU:
if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- memcpy(phys_to_virt(phys), dma_addr, size);
+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
else
BUG_ON(dir != DMA_TO_DEVICE);
break;
case SYNC_FOR_DEVICE:
if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- memcpy(dma_addr, phys_to_virt(phys), size);
+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
else
BUG_ON(dir != DMA_FROM_DEVICE);
break;
--
1.5.6.5
Also add hwdev argument - some platforms will need this to
calculate an actual bus address.
Signed-off-by: Becky Bruce <[email protected]>
---
lib/swiotlb.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 9b89a9d..add1f92 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -36,7 +36,7 @@
#define OFFSET(val,align) ((unsigned long) \
( (val) & ( (align) - 1)))
-#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(sg_virt(sg))
+#define SG_ENT_BUS_ADDRESS(hwdev, sg) virt_to_bus(sg_virt(sg))
/*
* Maximum allowable number of contiguous slabs to map,
@@ -765,7 +765,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i) {
- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ if (sg->dma_address != SG_ENT_BUS_ADDRESS(hwdev, sg))
unmap_single(hwdev, bus_to_virt(sg->dma_address),
sg->dma_length, dir);
else if (dir == DMA_FROM_DEVICE)
@@ -798,7 +798,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i) {
- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ if (sg->dma_address != SG_ENT_BUS_ADDRESS(hwdev, sg))
sync_single(hwdev, bus_to_virt(sg->dma_address),
sg->dma_length, dir, target);
else if (dir == DMA_FROM_DEVICE)
--
1.5.6.5
Currently, the bus address is printed, but the physical address
makes more sense in the context of the overall iotlb, and on
some platforms we need a hwdev pointer to determine a bus address,
which we don't have at the time of these printks.
Signed-off-by: Becky Bruce <[email protected]>
---
lib/swiotlb.c | 13 ++++++++-----
1 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index add1f92..ef09b4c 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -167,8 +167,10 @@ swiotlb_init_with_default_size(size_t default_size)
if (!io_tlb_overflow_buffer)
panic("Cannot allocate SWIOTLB overflow buffer!\n");
- printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
- virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
+ printk(KERN_INFO "Placing software IO TLB between cpu physical "
+ "0x%llx - 0x%llx\n",
+ (unsigned long long)virt_to_phys(io_tlb_start),
+ (unsigned long long)virt_to_phys(io_tlb_end));
}
void __init
@@ -249,9 +251,10 @@ swiotlb_late_init_with_default_size(size_t default_size)
if (!io_tlb_overflow_buffer)
goto cleanup4;
- printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - "
- "0x%lx\n", bytes >> 20,
- virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
+ printk(KERN_INFO "Placing %luMB software IO TLB between cpu physical "
+ "0x%llx - 0x%llx\n", bytes >> 20,
+ (unsigned long long)virt_to_phys(io_tlb_start),
+ (unsigned long long)virt_to_phys(io_tlb_end));
return 0;
--
1.5.6.5
SWIOTLB currently uses "int" as the type of the dma direction,
not "enum dma_data_direction as documented in DMA_API.txt. Now that
this code is about to become shared with powerpc, which uses
the enum, update the swiotlb code to use the enum as well. Doing this
requires changing all the arch's that use swiotlb use the enum also.
Signed-off-by: Becky Bruce <[email protected]>
---
arch/ia64/dig/dig_vtd_iommu.c | 9 +++--
arch/ia64/hp/common/hwsw_iommu.c | 22 ++++++++------
arch/ia64/hp/common/sba_iommu.c | 12 ++++---
arch/ia64/include/asm/dma-mapping.h | 29 +++++++++---------
arch/ia64/include/asm/swiotlb.h | 26 ++++++++++------
arch/ia64/kernel/machvec.c | 6 ++-
arch/ia64/sn/kernel/io_common.c | 3 +-
arch/ia64/sn/pci/pci_dma.c | 21 ++++++++-----
arch/ia64/sn/pci/pcibr/pcibr_dma.c | 3 +-
arch/ia64/sn/pci/tioca_provider.c | 3 +-
arch/x86/include/asm/dma-mapping.h | 50 ++++++++++++++++++--------------
arch/x86/include/asm/swiotlb.h | 24 +++++++++------
arch/x86/include/asm/tce.h | 3 +-
arch/x86/kernel/amd_iommu.c | 16 +++++-----
arch/x86/kernel/pci-calgary_64.c | 11 ++++---
arch/x86/kernel/pci-gart_64.c | 19 ++++++++----
arch/x86/kernel/pci-nommu.c | 4 +-
arch/x86/kernel/pci-swiotlb_64.c | 2 +-
arch/x86/kernel/tce_64.c | 3 +-
drivers/pci/intel-iommu.c | 13 ++++----
include/linux/intel-iommu.h | 12 +++++--
include/linux/swiotlb.h | 33 ++++++++++++---------
lib/swiotlb.c | 54 ++++++++++++++++++++---------------
23 files changed, 219 insertions(+), 159 deletions(-)
diff --git a/arch/ia64/dig/dig_vtd_iommu.c b/arch/ia64/dig/dig_vtd_iommu.c
index 1c8a079..f22daf6 100644
--- a/arch/ia64/dig/dig_vtd_iommu.c
+++ b/arch/ia64/dig/dig_vtd_iommu.c
@@ -21,7 +21,7 @@ EXPORT_SYMBOL_GPL(vtd_free_coherent);
dma_addr_t
vtd_map_single_attrs(struct device *dev, void *addr, size_t size,
- int dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
return intel_map_single(dev, (phys_addr_t)addr, size, dir);
}
@@ -29,7 +29,7 @@ EXPORT_SYMBOL_GPL(vtd_map_single_attrs);
void
vtd_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
- int dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
intel_unmap_single(dev, iova, size, dir);
}
@@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(vtd_unmap_single_attrs);
int
vtd_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
- int dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
return intel_map_sg(dev, sglist, nents, dir);
}
@@ -45,7 +45,8 @@ EXPORT_SYMBOL_GPL(vtd_map_sg_attrs);
void
vtd_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
- int nents, int dir, struct dma_attrs *attrs)
+ int nents, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
intel_unmap_sg(dev, sglist, nents, dir);
}
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 2769dbf..fd5af38 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -91,8 +91,8 @@ hwsw_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma
}
dma_addr_t
-hwsw_map_single_attrs(struct device *dev, void *addr, size_t size, int dir,
- struct dma_attrs *attrs)
+hwsw_map_single_attrs(struct device *dev, void *addr, size_t size,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
if (use_swiotlb(dev))
return swiotlb_map_single_attrs(dev, addr, size, dir, attrs);
@@ -103,7 +103,7 @@ EXPORT_SYMBOL(hwsw_map_single_attrs);
void
hwsw_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
- int dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
if (use_swiotlb(dev))
return swiotlb_unmap_single_attrs(dev, iova, size, dir, attrs);
@@ -114,7 +114,7 @@ EXPORT_SYMBOL(hwsw_unmap_single_attrs);
int
hwsw_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
- int dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
if (use_swiotlb(dev))
return swiotlb_map_sg_attrs(dev, sglist, nents, dir, attrs);
@@ -125,7 +125,7 @@ EXPORT_SYMBOL(hwsw_map_sg_attrs);
void
hwsw_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
- int dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
if (use_swiotlb(dev))
return swiotlb_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
@@ -135,7 +135,8 @@ hwsw_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
EXPORT_SYMBOL(hwsw_unmap_sg_attrs);
void
-hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int dir)
+hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir)
{
if (use_swiotlb(dev))
swiotlb_sync_single_for_cpu(dev, addr, size, dir);
@@ -144,7 +145,8 @@ hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int
}
void
-hwsw_sync_sg_for_cpu (struct device *dev, struct scatterlist *sg, int nelems, int dir)
+hwsw_sync_sg_for_cpu (struct device *dev, struct scatterlist *sg, int nelems,
+ enum dma_data_direction dir)
{
if (use_swiotlb(dev))
swiotlb_sync_sg_for_cpu(dev, sg, nelems, dir);
@@ -153,7 +155,8 @@ hwsw_sync_sg_for_cpu (struct device *dev, struct scatterlist *sg, int nelems, in
}
void
-hwsw_sync_single_for_device (struct device *dev, dma_addr_t addr, size_t size, int dir)
+hwsw_sync_single_for_device (struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir)
{
if (use_swiotlb(dev))
swiotlb_sync_single_for_device(dev, addr, size, dir);
@@ -162,7 +165,8 @@ hwsw_sync_single_for_device (struct device *dev, dma_addr_t addr, size_t size, i
}
void
-hwsw_sync_sg_for_device (struct device *dev, struct scatterlist *sg, int nelems, int dir)
+hwsw_sync_sg_for_device (struct device *dev, struct scatterlist *sg, int nelems,
+ enum dma_data_direction dir)
{
if (use_swiotlb(dev))
swiotlb_sync_sg_for_device(dev, sg, nelems, dir);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index d98f0f4..84fe18f 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -909,8 +909,8 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
* See Documentation/DMA-mapping.txt
*/
dma_addr_t
-sba_map_single_attrs(struct device *dev, void *addr, size_t size, int dir,
- struct dma_attrs *attrs)
+sba_map_single_attrs(struct device *dev, void *addr, size_t size,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
struct ioc *ioc;
dma_addr_t iovp;
@@ -1027,7 +1027,8 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
* See Documentation/DMA-mapping.txt
*/
void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
- int dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
struct ioc *ioc;
#if DELAYED_RESOURCE_CNT > 0
@@ -1423,7 +1424,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
* See Documentation/DMA-mapping.txt
*/
int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
- int dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
struct ioc *ioc;
int coalesced, filled = 0;
@@ -1515,7 +1516,8 @@ EXPORT_SYMBOL(sba_map_sg_attrs);
* See Documentation/DMA-mapping.txt
*/
void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
- int nents, int dir, struct dma_attrs *attrs)
+ int nents, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
#ifdef ASSERT_PDIR_SANITY
struct ioc *ioc;
diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index bbab7e2..7452354 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -17,32 +17,32 @@ struct dma_mapping_ops {
void (*free_coherent)(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle);
dma_addr_t (*map_single)(struct device *hwdev, unsigned long ptr,
- size_t size, int direction);
+ size_t size, enum dma_data_direction direction);
void (*unmap_single)(struct device *dev, dma_addr_t addr,
- size_t size, int direction);
+ size_t size, enum dma_data_direction direction);
void (*sync_single_for_cpu)(struct device *hwdev,
dma_addr_t dma_handle, size_t size,
- int direction);
+ enum dma_data_direction direction);
void (*sync_single_for_device)(struct device *hwdev,
dma_addr_t dma_handle, size_t size,
- int direction);
+ enum dma_data_direction direction);
void (*sync_single_range_for_cpu)(struct device *hwdev,
dma_addr_t dma_handle, unsigned long offset,
- size_t size, int direction);
+ size_t size, enum dma_data_direction direction);
void (*sync_single_range_for_device)(struct device *hwdev,
dma_addr_t dma_handle, unsigned long offset,
- size_t size, int direction);
+ size_t size, enum dma_data_direction direction);
void (*sync_sg_for_cpu)(struct device *hwdev,
struct scatterlist *sg, int nelems,
- int direction);
+ enum dma_data_direction direction);
void (*sync_sg_for_device)(struct device *hwdev,
struct scatterlist *sg, int nelems,
- int direction);
+ enum dma_data_direction direction);
int (*map_sg)(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction);
+ int nents, enum dma_data_direction direction);
void (*unmap_sg)(struct device *hwdev,
struct scatterlist *sg, int nents,
- int direction);
+ enum dma_data_direction direction);
int (*dma_supported_op)(struct device *hwdev, u64 mask);
int is_phys;
};
@@ -70,25 +70,26 @@ dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
}
#define dma_map_single_attrs platform_dma_map_single_attrs
static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
- size_t size, int dir)
+ size_t size,
+ enum dma_data_direction dir)
{
return dma_map_single_attrs(dev, cpu_addr, size, dir, NULL);
}
#define dma_map_sg_attrs platform_dma_map_sg_attrs
static inline int dma_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, int dir)
+ int nents, enum dma_data_direction dir)
{
return dma_map_sg_attrs(dev, sgl, nents, dir, NULL);
}
#define dma_unmap_single_attrs platform_dma_unmap_single_attrs
static inline void dma_unmap_single(struct device *dev, dma_addr_t cpu_addr,
- size_t size, int dir)
+ size_t size, enum dma_data_direction dir)
{
return dma_unmap_single_attrs(dev, cpu_addr, size, dir, NULL);
}
#define dma_unmap_sg_attrs platform_dma_unmap_sg_attrs
static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
- int nents, int dir)
+ int nents, enum dma_data_direction dir)
{
return dma_unmap_sg_attrs(dev, sgl, nents, dir, NULL);
}
diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h
index fb79423..81c5629 100644
--- a/arch/ia64/include/asm/swiotlb.h
+++ b/arch/ia64/include/asm/swiotlb.h
@@ -6,35 +6,41 @@
/* SWIOTLB interface */
extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr,
- size_t size, int dir);
+ size_t size,
+ enum dma_data_direction dir);
extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags);
extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir);
+ size_t size,
+ enum dma_data_direction dir);
extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
dma_addr_t dev_addr,
- size_t size, int dir);
+ size_t size,
+ enum dma_data_direction dir);
extern void swiotlb_sync_single_for_device(struct device *hwdev,
dma_addr_t dev_addr,
- size_t size, int dir);
+ size_t size,
+ enum dma_data_direction dir);
extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev,
dma_addr_t dev_addr,
unsigned long offset,
- size_t size, int dir);
+ size_t size,
+ enum dma_data_direction dir);
extern void swiotlb_sync_single_range_for_device(struct device *hwdev,
dma_addr_t dev_addr,
unsigned long offset,
- size_t size, int dir);
+ size_t size,
+ enum dma_data_direction dir);
extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
struct scatterlist *sg, int nelems,
- int dir);
+ enum dma_data_direction dir);
extern void swiotlb_sync_sg_for_device(struct device *hwdev,
struct scatterlist *sg, int nelems,
- int dir);
+ enum dma_data_direction dir);
extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction);
+ int nents, enum dma_data_direction direction);
extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction);
+ int nents, enum dma_data_direction direction);
extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
extern void swiotlb_free_coherent(struct device *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle);
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index 7ccb228..2c0f867 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -75,14 +75,16 @@ machvec_timer_interrupt (int irq, void *dev_id)
EXPORT_SYMBOL(machvec_timer_interrupt);
void
-machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir)
+machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction dir)
{
mb();
}
EXPORT_SYMBOL(machvec_dma_sync_single);
void
-machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir)
+machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n,
+ enum dma_data_direction dir)
{
mb();
}
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 8a924a5..c8c2b04 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -60,7 +60,8 @@ sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int t
}
static void
-sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction)
+sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr,
+ enum dma_data_direction direction)
{
return;
}
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 53ebb64..87e903d 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -174,7 +174,8 @@ EXPORT_SYMBOL(sn_dma_free_coherent);
* figure out how to save dmamap handle so can use two step.
*/
dma_addr_t sn_dma_map_single_attrs(struct device *dev, void *cpu_addr,
- size_t size, int direction,
+ size_t size,
+ enum dma_data_direction direction,
struct dma_attrs *attrs)
{
dma_addr_t dma_addr;
@@ -216,7 +217,8 @@ EXPORT_SYMBOL(sn_dma_map_single_attrs);
* coherent, so we just need to free any ATEs associated with this mapping.
*/
void sn_dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
- size_t size, int direction,
+ size_t size,
+ enum dma_data_direction direction,
struct dma_attrs *attrs)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -239,7 +241,7 @@ EXPORT_SYMBOL(sn_dma_unmap_single_attrs);
* Unmap a set of streaming mode DMA translations.
*/
void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
- int nhwentries, int direction,
+ int nhwentries, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
int i;
@@ -273,7 +275,8 @@ EXPORT_SYMBOL(sn_dma_unmap_sg_attrs);
* Maps each entry of @sg for DMA.
*/
int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
- int nhwentries, int direction, struct dma_attrs *attrs)
+ int nhwentries, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
unsigned long phys_addr;
struct scatterlist *saved_sg = sgl, *sg;
@@ -323,28 +326,30 @@ int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
EXPORT_SYMBOL(sn_dma_map_sg_attrs);
void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
- size_t size, int direction)
+ size_t size,
+ enum dma_data_direction direction)
{
BUG_ON(dev->bus != &pci_bus_type);
}
EXPORT_SYMBOL(sn_dma_sync_single_for_cpu);
void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
- size_t size, int direction)
+ size_t size,
+ enum dma_data_direction direction)
{
BUG_ON(dev->bus != &pci_bus_type);
}
EXPORT_SYMBOL(sn_dma_sync_single_for_device);
void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
- int nelems, int direction)
+ int nelems, enum dma_data_direction direction)
{
BUG_ON(dev->bus != &pci_bus_type);
}
EXPORT_SYMBOL(sn_dma_sync_sg_for_cpu);
void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int nelems, int direction)
+ int nelems, enum dma_data_direction direction)
{
BUG_ON(dev->bus != &pci_bus_type);
}
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index e626e50..c87d57e 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -205,7 +205,8 @@ pcibr_dmatrans_direct32(struct pcidev_info * info,
* DMA mappings for Direct 64 and 32 do not have any DMA maps.
*/
void
-pcibr_dma_unmap(struct pci_dev *hwdev, dma_addr_t dma_handle, int direction)
+pcibr_dma_unmap(struct pci_dev *hwdev, dma_addr_t dma_handle,
+ enum dma_data_direction direction)
{
struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
struct pcibus_info *pcibus_info =
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 7916512..64b46f3 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -467,7 +467,8 @@ map_return:
* resources to release.
*/
static void
-tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
+tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr,
+ enum dma_data_direction dir)
{
int i, entry;
struct tioca_common *tioca_common;
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 40d155d..84c7e77 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -25,32 +25,36 @@ struct dma_mapping_ops {
void (*free_coherent)(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle);
dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr,
- size_t size, int direction);
+ size_t size,
+ enum dma_data_direction direction);
void (*unmap_single)(struct device *dev, dma_addr_t addr,
- size_t size, int direction);
+ size_t size,
+ enum dma_data_direction direction);
void (*sync_single_for_cpu)(struct device *hwdev,
dma_addr_t dma_handle, size_t size,
- int direction);
+ enum dma_data_direction direction);
void (*sync_single_for_device)(struct device *hwdev,
dma_addr_t dma_handle, size_t size,
- int direction);
+ enum dma_data_direction direction);
void (*sync_single_range_for_cpu)(struct device *hwdev,
dma_addr_t dma_handle, unsigned long offset,
- size_t size, int direction);
+ size_t size,
+ enum dma_data_direction direction);
void (*sync_single_range_for_device)(struct device *hwdev,
dma_addr_t dma_handle, unsigned long offset,
- size_t size, int direction);
+ size_t size,
+ enum dma_data_direction direction);
void (*sync_sg_for_cpu)(struct device *hwdev,
struct scatterlist *sg, int nelems,
- int direction);
+ enum dma_data_direction direction);
void (*sync_sg_for_device)(struct device *hwdev,
struct scatterlist *sg, int nelems,
- int direction);
+ enum dma_data_direction direction);
int (*map_sg)(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction);
+ int nents, enum dma_data_direction direction);
void (*unmap_sg)(struct device *hwdev,
struct scatterlist *sg, int nents,
- int direction);
+ enum dma_data_direction direction);
int (*dma_supported)(struct device *hwdev, u64 mask);
int is_phys;
};
@@ -91,7 +95,7 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
static inline dma_addr_t
dma_map_single(struct device *hwdev, void *ptr, size_t size,
- int direction)
+ enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
@@ -102,7 +106,7 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size,
static inline void
dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
- int direction)
+ enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(dev);
@@ -113,7 +117,7 @@ dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
static inline int
dma_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction)
+ int nents, enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
@@ -123,7 +127,7 @@ dma_map_sg(struct device *hwdev, struct scatterlist *sg,
static inline void
dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
- int direction)
+ enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
@@ -134,7 +138,7 @@ dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
static inline void
dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
- size_t size, int direction)
+ size_t size, enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
@@ -146,7 +150,7 @@ dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
static inline void
dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
- size_t size, int direction)
+ size_t size, enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
@@ -158,7 +162,8 @@ dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
static inline void
dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
- unsigned long offset, size_t size, int direction)
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
@@ -172,7 +177,7 @@ dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
static inline void
dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
unsigned long offset, size_t size,
- int direction)
+ enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
@@ -185,7 +190,7 @@ dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
static inline void
dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
- int nelems, int direction)
+ int nelems, enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
@@ -197,7 +202,7 @@ dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
static inline void
dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
- int nelems, int direction)
+ int nelems, enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
@@ -210,7 +215,7 @@ dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
- int direction)
+ enum dma_data_direction direction)
{
struct dma_mapping_ops *ops = get_dma_ops(dev);
@@ -220,7 +225,8 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
}
static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
- size_t size, int direction)
+ size_t size,
+ enum dma_data_direction direction)
{
dma_unmap_single(dev, addr, size, direction);
}
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 51fb2c7..c51b25e 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -6,35 +6,39 @@
/* SWIOTLB interface */
extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr,
- size_t size, int dir);
+ size_t size, enum dma_data_direction dir);
extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags);
extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir);
+ size_t size, enum dma_data_direction dir);
extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
dma_addr_t dev_addr,
- size_t size, int dir);
+ size_t size,
+ enum dma_data_direction dir);
extern void swiotlb_sync_single_for_device(struct device *hwdev,
dma_addr_t dev_addr,
- size_t size, int dir);
+ size_t size,
+ enum dma_data_direction dir);
extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev,
dma_addr_t dev_addr,
unsigned long offset,
- size_t size, int dir);
+ size_t size,
+ enum dma_data_direction dir);
extern void swiotlb_sync_single_range_for_device(struct device *hwdev,
dma_addr_t dev_addr,
unsigned long offset,
- size_t size, int dir);
+ size_t size,
+ enum dma_data_direction dir);
extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
struct scatterlist *sg, int nelems,
- int dir);
+ enum dma_data_direction dir);
extern void swiotlb_sync_sg_for_device(struct device *hwdev,
struct scatterlist *sg, int nelems,
- int dir);
+ enum dma_data_direction dir);
extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction);
+ int nents, enum dma_data_direction direction);
extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction);
+ int nents, enum dma_data_direction direction);
extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
extern void swiotlb_free_coherent(struct device *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle);
diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h
index 7a6677c..644b60a 100644
--- a/arch/x86/include/asm/tce.h
+++ b/arch/x86/include/asm/tce.h
@@ -39,7 +39,8 @@ struct iommu_table;
#define TCE_RPN_MASK 0x0000fffffffff000ULL
extern void tce_build(struct iommu_table *tbl, unsigned long index,
- unsigned int npages, unsigned long uaddr, int direction);
+ unsigned int npages, unsigned long uaddr,
+ enum dma_data_direction direction);
extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages);
extern void * __init alloc_tce_table(void);
extern void __init free_tce_table(void *tbl);
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index ce948aa..13ec5d8 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -872,7 +872,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
struct dma_ops_domain *dom,
unsigned long address,
phys_addr_t paddr,
- int direction)
+ enum dma_data_direction direction)
{
u64 *pte, __pte;
@@ -932,7 +932,7 @@ static dma_addr_t __map_single(struct device *dev,
struct dma_ops_domain *dma_dom,
phys_addr_t paddr,
size_t size,
- int dir,
+ enum dma_data_direction dir,
bool align,
u64 dma_mask)
{
@@ -979,7 +979,7 @@ static void __unmap_single(struct amd_iommu *iommu,
struct dma_ops_domain *dma_dom,
dma_addr_t dma_addr,
size_t size,
- int dir)
+ enum dma_data_direction dir)
{
dma_addr_t i, start;
unsigned int pages;
@@ -1008,7 +1008,7 @@ static void __unmap_single(struct amd_iommu *iommu,
* The exported map_single function for dma_ops.
*/
static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
- size_t size, int dir)
+ size_t size, enum dma_data_direction dir)
{
unsigned long flags;
struct amd_iommu *iommu;
@@ -1046,7 +1046,7 @@ out:
* The exported unmap_single function for dma_ops.
*/
static void unmap_single(struct device *dev, dma_addr_t dma_addr,
- size_t size, int dir)
+ size_t size, enum dma_data_direction dir)
{
unsigned long flags;
struct amd_iommu *iommu;
@@ -1072,7 +1072,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
* device which is not handled by an AMD IOMMU in the system.
*/
static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
- int nelems, int dir)
+ int nelems, enum dma_data_direction dir)
{
struct scatterlist *s;
int i;
@@ -1090,7 +1090,7 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
* lists).
*/
static int map_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, int dir)
+ int nelems, enum dma_data_direction dir)
{
unsigned long flags;
struct amd_iommu *iommu;
@@ -1152,7 +1152,7 @@ unmap:
* lists).
*/
static void unmap_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, int dir)
+ int nelems, enum dma_data_direction dir)
{
unsigned long flags;
struct amd_iommu *iommu;
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index d28bbdc..76a55c0 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -297,7 +297,8 @@ static unsigned long iommu_range_alloc(struct device *dev,
}
static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
- void *vaddr, unsigned int npages, int direction)
+ void *vaddr, unsigned int npages,
+ enum dma_data_direction direction)
{
unsigned long entry;
dma_addr_t ret = bad_dma_address;
@@ -381,7 +382,7 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
}
static void calgary_unmap_sg(struct device *dev,
- struct scatterlist *sglist, int nelems, int direction)
+ struct scatterlist *sglist, int nelems, enum dma_data_direction dir)
{
struct iommu_table *tbl = find_iommu_table(dev);
struct scatterlist *s;
@@ -404,7 +405,7 @@ static void calgary_unmap_sg(struct device *dev,
}
static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
- int nelems, int direction)
+ int nelems, enum dma_data_direction direction)
{
struct iommu_table *tbl = find_iommu_table(dev);
struct scatterlist *s;
@@ -446,7 +447,7 @@ error:
}
static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
- size_t size, int direction)
+ size_t size, enum dma_data_direction direction)
{
void *vaddr = phys_to_virt(paddr);
unsigned long uaddr;
@@ -460,7 +461,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
}
static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
- size_t size, int direction)
+ size_t size, enum dma_data_direction direction)
{
struct iommu_table *tbl = find_iommu_table(dev);
unsigned int npages;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ba7ad83..f3cd99b 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -188,7 +188,8 @@ static void dump_leak(void)
# define CLEAR_LEAK(x)
#endif
-static void iommu_full(struct device *dev, size_t size, int dir)
+static void
+iommu_full(struct device *dev, size_t size, enum dma_data_direction dir)
{
/*
* Ran out of IOMMU space for this operation. This is very bad.
@@ -231,7 +232,8 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
* Caller needs to check if the iommu is needed and flush.
*/
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
- size_t size, int dir, unsigned long align_mask)
+ size_t size, enum dma_data_direction dir,
+ unsigned long align_mask)
{
unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
@@ -256,7 +258,8 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
/* Map a single area into the IOMMU */
static dma_addr_t
-gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
+gart_map_single(struct device *dev, phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
{
unsigned long bus;
@@ -276,7 +279,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
* Free a DMA mapping.
*/
static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
- size_t size, int direction)
+ size_t size, enum dma_data_direction direction)
{
unsigned long iommu_page;
int npages;
@@ -299,7 +302,8 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
* Wrapper for pci_unmap_single working with scatterlists.
*/
static void
-gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
+gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir)
{
struct scatterlist *s;
int i;
@@ -313,7 +317,7 @@ gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
/* Fallback for dma_map_sg in case of overflow */
static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
- int nents, int dir)
+ int nents, enum dma_data_direction dir)
{
struct scatterlist *s;
int i;
@@ -401,7 +405,8 @@ dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
* Merge chunks that have page aligned sizes into a continuous mapping.
*/
static int
-gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
+gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir)
{
struct scatterlist *s, *ps, *start_sg, *sgmap;
int need = 0, nextneed, i, out, start;
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index c70ab5a..a4c6b7f 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -27,7 +27,7 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
static dma_addr_t
nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
- int direction)
+ enum dma_data_direction direction)
{
dma_addr_t bus = paddr;
WARN_ON(size == 0);
@@ -54,7 +54,7 @@ nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
* the same here.
*/
static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, int direction)
+ int nents, enum dma_data_direction direction)
{
struct scatterlist *s;
int i;
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 3c539d1..c400a0b 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -13,7 +13,7 @@ int swiotlb __read_mostly;
static dma_addr_t
swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
- int direction)
+ enum dma_data_direction direction)
{
return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
}
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index 9e540fe..29bc5b6 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -46,7 +46,8 @@ static inline void flush_tce(void* tceaddr)
}
void tce_build(struct iommu_table *tbl, unsigned long index,
- unsigned int npages, unsigned long uaddr, int direction)
+ unsigned int npages, unsigned long uaddr,
+ enum dma_data_direction direction)
{
u64* tp;
u64 t;
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 213a5c8..0232293 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1997,7 +1997,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
}
static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
- size_t size, int dir, u64 dma_mask)
+ size_t size,
+ enum dma_data_direction dir, u64 dma_mask)
{
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
@@ -2059,7 +2060,7 @@ error:
}
dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
- size_t size, int dir)
+ size_t size, enum dma_data_direction dir)
{
return __intel_map_single(hwdev, paddr, size, dir,
to_pci_dev(hwdev)->dma_mask);
@@ -2124,7 +2125,7 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
}
void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
- int dir)
+ enum dma_data_direction dir)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct dmar_domain *domain;
@@ -2204,7 +2205,7 @@ void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
- int nelems, int dir)
+ int nelems, enum dma_data_direction dir)
{
int i;
struct pci_dev *pdev = to_pci_dev(hwdev);
@@ -2244,7 +2245,7 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
}
static int intel_nontranslate_map_sg(struct device *hddev,
- struct scatterlist *sglist, int nelems, int dir)
+ struct scatterlist *sglist, int nelems, enum dma_data_direction dir)
{
int i;
struct scatterlist *sg;
@@ -2258,7 +2259,7 @@ static int intel_nontranslate_map_sg(struct device *hddev,
}
int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
- int dir)
+ enum dma_data_direction dir)
{
void *addr;
int i;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1bff7bf..eba544e 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -350,9 +350,13 @@ static inline int intel_iommu_found(void)
extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t);
extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
-extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int);
-extern void intel_unmap_single(struct device *, dma_addr_t, size_t, int);
-extern int intel_map_sg(struct device *, struct scatterlist *, int, int);
-extern void intel_unmap_sg(struct device *, struct scatterlist *, int, int);
+extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t,
+ enum dma_data_direction);
+extern void intel_unmap_single(struct device *, dma_addr_t, size_t,
+ enum dma_data_direction);
+extern int intel_map_sg(struct device *, struct scatterlist *, int,
+ enum dma_data_direction);
+extern void intel_unmap_sg(struct device *, struct scatterlist *, int,
+ enum dma_data_direction);
#endif
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b18ec55..f4d1be3 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -19,60 +19,65 @@ swiotlb_free_coherent(struct device *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle);
extern dma_addr_t
-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir);
+swiotlb_map_single(struct device *hwdev, void *ptr, size_t size,
+ enum dma_data_direction dir);
extern void
swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir);
+ size_t size, enum dma_data_direction dir);
extern dma_addr_t
swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
- int dir, struct dma_attrs *attrs);
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs);
extern void
swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir, struct dma_attrs *attrs);
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs);
extern int
swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
- int direction);
+ enum dma_data_direction direction);
extern void
swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
- int direction);
+ enum dma_data_direction direction);
extern int
swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
- int dir, struct dma_attrs *attrs);
+ enum dma_data_direction dir, struct dma_attrs *attrs);
extern void
swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
- int nelems, int dir, struct dma_attrs *attrs);
+ int nelems, enum dma_data_direction dir,
+ struct dma_attrs *attrs);
extern void
swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir);
+ size_t size, enum dma_data_direction dir);
extern void
swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
- int nelems, int dir);
+ int nelems, enum dma_data_direction dir);
extern void
swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir);
+ size_t size, enum dma_data_direction dir);
extern void
swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
- int nelems, int dir);
+ int nelems, enum dma_data_direction dir);
extern void
swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
- unsigned long offset, size_t size, int dir);
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir);
extern void
swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
unsigned long offset, size_t size,
- int dir);
+ enum dma_data_direction dir);
extern int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index ab5d3d7..fc906d5 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -350,7 +350,8 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
* Allocates bounce buffer and returns its kernel virtual address.
*/
static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
{
unsigned long flags;
char *dma_addr;
@@ -452,7 +453,8 @@ found:
* dma_addr is the kernel virtual address of the bounce buffer to unmap.
*/
static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+unmap_single(struct device *hwdev, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
{
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -493,7 +495,7 @@ unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
static void
sync_single(struct device *hwdev, char *dma_addr, size_t size,
- int dir, int target)
+ enum dma_data_direction dir, int target)
{
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
phys_addr_t phys = io_tlb_orig_addr[index];
@@ -583,7 +585,8 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
}
static void
-swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
+ int do_panic)
{
/*
* Ran out of IOMMU space for this operation. This is very bad.
@@ -612,7 +615,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
*/
dma_addr_t
swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
- int dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
dma_addr_t dev_addr = virt_to_dma_addr(hwdev, ptr);
void *map;
@@ -649,7 +652,8 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
EXPORT_SYMBOL(swiotlb_map_single_attrs);
dma_addr_t
-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
+swiotlb_map_single(struct device *hwdev, void *ptr, size_t size,
+ enum dma_data_direction dir)
{
return swiotlb_map_single_attrs(hwdev, ptr, size, dir, NULL);
}
@@ -664,7 +668,8 @@ swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
*/
void
swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir, struct dma_attrs *attrs)
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
char *dma_addr = dma_addr_to_virt(hwdev, dev_addr);
@@ -678,7 +683,7 @@ EXPORT_SYMBOL(swiotlb_unmap_single_attrs);
void
swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
- int dir)
+ enum dma_data_direction dir)
{
return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL);
}
@@ -694,7 +699,7 @@ swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
*/
static void
swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir, int target)
+ size_t size, enum dma_data_direction dir, int target)
{
char *dma_addr = dma_addr_to_virt(hwdev, dev_addr);
@@ -707,14 +712,14 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
void
swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir)
+ size_t size, enum dma_data_direction dir)
{
swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
}
void
swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir)
+ size_t size, enum dma_data_direction dir)
{
swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
}
@@ -725,7 +730,7 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
static void
swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
unsigned long offset, size_t size,
- int dir, int target)
+ enum dma_data_direction dir, int target)
{
char *dma_addr = dma_addr_to_virt(hwdev, dev_addr) + offset;
@@ -738,7 +743,8 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
void
swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
- unsigned long offset, size_t size, int dir)
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir)
{
swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
SYNC_FOR_CPU);
@@ -746,14 +752,15 @@ swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
void
swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
- unsigned long offset, size_t size, int dir)
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir)
{
swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
SYNC_FOR_DEVICE);
}
-void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int,
- struct dma_attrs *);
+void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int,
+ enum dma_data_direction, struct dma_attrs *);
/*
* Map a set of buffers described by scatterlist in streaming mode for DMA.
* This is the scatter-gather version of the above swiotlb_map_single
@@ -772,7 +779,7 @@ void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int,
*/
int
swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
- int dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
struct scatterlist *sg;
dma_addr_t dev_addr;
@@ -806,7 +813,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs);
int
swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
- int dir)
+ enum dma_data_direction dir)
{
return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
}
@@ -817,7 +824,8 @@ swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
*/
void
swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
- int nelems, int dir, struct dma_attrs *attrs)
+ int nelems, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
struct scatterlist *sg;
int i;
@@ -837,7 +845,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
void
swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
- int dir)
+ enum dma_data_direction dir)
{
return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
}
@@ -851,7 +859,7 @@ swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
*/
static void
swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
- int nelems, int dir, int target)
+ int nelems, enum dma_data_direction dir, int target)
{
struct scatterlist *sg;
int i;
@@ -870,14 +878,14 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
void
swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
- int nelems, int dir)
+ int nelems, enum dma_data_direction dir)
{
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
}
void
swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
- int nelems, int dir)
+ int nelems, enum dma_data_direction dir)
{
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
}
--
1.5.6.5
On Dec 18, 2008, at 11:11 PM, Becky Bruce wrote:
> Ingo, Jeremy,
>
> For reference, here's the set of patches I've been testing on powerpc
> using swiotlb with HIGHMEM enabled. I think if we can deal with the
> issues I mentioned in my previous mail, we should be able to use the
> patch series Jeremy published on ppc - I'm just sticking this out here
> since I'm about to fall off the internet for a while. I'll look at
> getting your code working on ppc as soon as I'm back online.
>
> Cheers,
> Becky
There are in fact only 9 patches relevant to this list - the other 2
of the "11" are the powerpc-specific platform code and some debug
foo. Apologies for the erroneous numbering scheme.
=B
Becky Bruce wrote:
> I've taken a quick look at the series posted to the list, and they're
> actually very similar to what I've done. I think there are really
> only 3 fairly minor issues that need to be resolved to make this work
> on ppc:
>
> 1) We need to add swiotlb_map/unmap_page calls, as those are part the
> ppc dma_mapping_ops (in fact, we don't have map/unmap_single in the
> dma_mapping_ops struct on ppc anymore - those just call
> map/unmap_page). This is really just a matter of moving some code
> around and making some minor changes, as swiotlb_map/unmap_single can
> call swiotlb_map/unmap_page once the args have been converted.
> There's a patch in my series that should make this pretty obvious.
>
> 2) To convert any address to/from a bus address, we also need the
> hwdev pointer passed as an argument since ppc supports a per-device
> offset accessed via the device ptr that is used to calculate the bus
> address. I'd also given my conversion functions more generic names,
> as it seemed highly likely that these would eventually be useful
> outside of the swiotlb code.
>
> 3) powerpc uses enum dma_data_direction for the direction argument to
> its dma_ops, which is, from reading the kernel docs, the correct
> argument type for the DMA API functions. However, the iotlb/x86/ia64
> code is currently using an int. This causes a build warning when we
> initialize the dma_ops struct using the swiotlb funcs. Is there a
> reason for the use of "int" in x86/ia64? The Right Thing(TM) here
> seems to be to convert those over to using the enum, and I have a big
> patch that starts doing that, but I've probably missed some places. I
> could instead do some hackery on the ppc side, and leave the other
> platforms alone, but I'd prefer to do it cleanly. Thoughts?
>
> Unfortunately, this is horrible timing for me, as starting tomorrow,
> I'm going to be offline for a week and a half or so in podunk
> Louisiana with essentially no net access. I can integrate my code
> into your tree and test on PPC as soon as I return to the real world.
Yeah, I think that's OK. The important thing at this point is to
determine whether the two patch sets are aligned or conflicting. It
sounds like they're largely aligned, and so generating a delta from my
patches to match your needs will be relatively straightforward.
I'm trying to line all this Xen stuff up for this merge window, so I'd
prefer to revisit it in the next dev cycle. Did you want to get
something into this merge window?
J
* FUJITA Tomonori <[email protected]> wrote:
> On Thu, 18 Dec 2008 12:17:54 -0600
> Becky Bruce <[email protected]> wrote:
>
> > > Can you be more specific? What architecture is plan to use highmem
> > > support in swiotlb?
> >
> > 32-bit powerpc needs this support - I was actually about to push a
> > similar set of patches. We have several processors that support 36
> > bits of physical address space and do not have any iommu capability.
> > The rest of the kernel support for those processors is now in place,
> > so swiotlb is the last piece of the puzzle for that to be fully
> > functional. I need to take a closer look at this series to see
> > exactly what it's doing and how it differs from what I've been testing.
>
> Ah, thanks,
>
> Then I accept that highmem support in lib/swiotbl.c is generically
> useful and we have to live with the ugly complication due to it.
Great! And we of course want to make the best possible job here - so
whenever you notice any area that could be done better and less
intrusively, please mention it.
Ingo
On Dec 19, 2008, at 1:02 AM, Jeremy Fitzhardinge wrote:
> Becky Bruce wrote:
>>
>>
>> Unfortunately, this is horrible timing for me, as starting
>> tomorrow, I'm going to be offline for a week and a half or so in
>> podunk Louisiana with essentially no net access. I can integrate
>> my code into your tree and test on PPC as soon as I return to the
>> real world.
>
> Yeah, I think that's OK. The important thing at this point is to
> determine whether the two patch sets are aligned or conflicting. It
> sounds like they're largely aligned, and so generating a delta from
> my patches to match your needs will be relatively straightforward.
>
> I'm trying to line all this Xen stuff up for this merge window, so
> I'd prefer to revisit it in the next dev cycle. Did you want to get
> something into this merge window?
I was originally hoping to get something into this merge window, but
the timing just isn't working out for me. The rest of the support is
already in the kernel for ppc (actually, most everything is in
2.6.28), and swiotlb is the last bit I need to get this working. Ah
well.... I'll hop to work on this from the ppc point of view as soon
as I get back online, and we'll see where that takes us.
Thanks!
-Becky
Becky Bruce wrote:
> When we enable swiotlb for platforms that support HIGHMEM, we
> can no longer store the virtual address of the original dma
> buffer, because that buffer might not have a permament mapping.
> Change the iotlb code to instead store the physical address of
> the original buffer.
>
Hm, yes, I think using a phys_addr_t may end up being cleaner than using
struct page *+offset.
J
> Signed-off-by: Becky Bruce <[email protected]>
> ---
> lib/swiotlb.c | 47 ++++++++++++++++++++++++-----------------------
> 1 files changed, 24 insertions(+), 23 deletions(-)
>
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> index ed4f44a..e9d5bf6 100644
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -118,7 +118,7 @@ static unsigned int io_tlb_index;
> * We need to save away the original address corresponding to a mapped entry
> * for the sync operations.
> */
> -static unsigned char **io_tlb_orig_addr;
> +static phys_addr_t *io_tlb_orig_addr;
>
> /*
> * Protect the above data structures in the map and unmap calls
> @@ -175,7 +175,7 @@ swiotlb_init_with_default_size(size_t default_size)
> for (i = 0; i < io_tlb_nslabs; i++)
> io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
> io_tlb_index = 0;
> - io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
> + io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
>
> /*
> * Get the overflow emergency buffer
> @@ -253,12 +253,14 @@ swiotlb_late_init_with_default_size(size_t default_size)
> io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
> io_tlb_index = 0;
>
> - io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
> - get_order(io_tlb_nslabs * sizeof(char *)));
> + io_tlb_orig_addr = (phys_addr_t *)
> + __get_free_pages(GFP_KERNEL,
> + get_order(io_tlb_nslabs *
> + sizeof(phys_addr_t)));
> if (!io_tlb_orig_addr)
> goto cleanup3;
>
> - memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
> + memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
>
> /*
> * Get the overflow emergency buffer
> @@ -276,8 +278,8 @@ swiotlb_late_init_with_default_size(size_t default_size)
> return 0;
>
> cleanup4:
> - free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
> - sizeof(char *)));
> + free_pages((unsigned long)io_tlb_orig_addr,
> + get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
> io_tlb_orig_addr = NULL;
> cleanup3:
> free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
> @@ -307,7 +309,7 @@ static int is_swiotlb_buffer(char *addr)
> * Allocates bounce buffer and returns its kernel virtual address.
> */
> static void *
> -map_single(struct device *hwdev, char *buffer, size_t size, int dir)
> +map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
> {
> unsigned long flags;
> char *dma_addr;
> @@ -398,9 +400,9 @@ found:
> * needed.
> */
> for (i = 0; i < nslots; i++)
> - io_tlb_orig_addr[index+i] = buffer + (i << IO_TLB_SHIFT);
> + io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
> if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
> - memcpy(dma_addr, buffer, size);
> + memcpy(dma_addr, phys_to_virt(phys), size);
>
> return dma_addr;
> }
> @@ -414,17 +416,17 @@ unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
> unsigned long flags;
> int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
> int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
> - char *buffer = io_tlb_orig_addr[index];
> + phys_addr_t phys = io_tlb_orig_addr[index];
>
> /*
> * First, sync the memory before unmapping the entry
> */
> - if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
> + if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
> /*
> * bounce... copy the data back into the original buffer * and
> * delete the bounce buffer.
> */
> - memcpy(buffer, dma_addr, size);
> + memcpy(phys_to_virt(phys), dma_addr, size);
>
> /*
> * Return the buffer to the free list by setting the corresponding
> @@ -457,20 +459,20 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size,
> int dir, int target)
> {
> int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
> - char *buffer = io_tlb_orig_addr[index];
> + phys_addr_t phys = io_tlb_orig_addr[index];
>
> - buffer += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
> + phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
>
> switch (target) {
> case SYNC_FOR_CPU:
> if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
> - memcpy(buffer, dma_addr, size);
> + memcpy(phys_to_virt(phys), dma_addr, size);
> else
> BUG_ON(dir != DMA_TO_DEVICE);
> break;
> case SYNC_FOR_DEVICE:
> if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
> - memcpy(dma_addr, buffer, size);
> + memcpy(dma_addr, phys_to_virt(phys), size);
> else
> BUG_ON(dir != DMA_FROM_DEVICE);
> break;
> @@ -509,7 +511,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> * swiotlb_map_single(), which will grab memory from
> * the lowest available address range.
> */
> - ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
> + ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
> if (!ret)
> return NULL;
> }
> @@ -591,7 +593,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
> /*
> * Oh well, have to allocate and map a bounce buffer.
> */
> - map = map_single(hwdev, ptr, size, dir);
> + map = map_single(hwdev, virt_to_phys(ptr), size, dir);
> if (!map) {
> swiotlb_full(hwdev, size, dir, 1);
> map = io_tlb_overflow_buffer;
> @@ -736,18 +738,17 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
> int dir, struct dma_attrs *attrs)
> {
> struct scatterlist *sg;
> - void *addr;
> dma_addr_t dev_addr;
> int i;
>
> BUG_ON(dir == DMA_NONE);
>
> for_each_sg(sgl, sg, nelems, i) {
> - addr = sg_virt(sg);
> - dev_addr = virt_to_dma_addr(hwdev, addr);
> + dev_addr = SG_ENT_BUS_ADDRESS(hwdev, sg);
> if (swiotlb_force ||
> swiotlb_addr_needs_mapping(hwdev, dev_addr, sg->length)) {
> - void *map = map_single(hwdev, addr, sg->length, dir);
> + void *map = map_single(hwdev, sg_phys(sg), sg->length,
> + dir);
> if (!map) {
> /* Don't panic here, we expect map_sg users
> to do proper error handling. */
>
Becky Bruce wrote:
> On highmem systems, the original dma buffer might not
> have a virtual mapping - we need to kmap it in to perform
> the bounce. Extract the code that does the actual
> copy into a function that does the kmap if highmem
> is enabled, and defaults to the normal swiotlb memcpy
> if not.
>
> Signed-off-by: Becky Bruce <[email protected]>
> ---
> lib/swiotlb.c | 53 +++++++++++++++++++++++++++++++++++++++++++++--------
> 1 files changed, 45 insertions(+), 8 deletions(-)
>
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> index e9d5bf6..ab5d3d7 100644
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -14,6 +14,7 @@
> * 04/07/.. ak Better overflow handling. Assorted fixes.
> * 05/09/10 linville Add support for syncing ranges, support syncing for
> * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
> + * 08/12/11 beckyb Add highmem support
> */
>
> #include <linux/cache.h>
> @@ -24,6 +25,7 @@
> #include <linux/string.h>
> #include <linux/types.h>
> #include <linux/ctype.h>
> +#include <linux/highmem.h>
>
> #include <asm/io.h>
> #include <asm/dma.h>
> @@ -306,6 +308,45 @@ static int is_swiotlb_buffer(char *addr)
> }
>
> /*
> + * Bounce: copy the swiotlb buffer back to the original dma location
> + */
> +void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
> + enum dma_data_direction dir)
> +{
> +#ifdef CONFIG_HIGHMEM
>
Rather than using #ifdef CONFIG_HIGHMEM, I think it would be better to
use something like:
if (PageHighMem(phys_to_page(phys))) {
/* handle high page */
} else {
/* simple path */
}
This will still exclude all the highmem code on non-highmem systems, and
will use the fast-path for lowmem pages too.
> + /* The buffer may not have a mapping. Map it in and copy */
> + unsigned int offset = ((unsigned long)phys &
> + ((1 << PAGE_SHIFT) - 1));
> + char *buffer;
> + unsigned int sz = 0;
> + unsigned long flags;
> +
> + while (size) {
> + sz = ((PAGE_SIZE - offset) > size) ? size :
> + PAGE_SIZE - offset;
>
sz = min(size, PAGE_SIZE - offset)?
> + local_irq_save(flags);
> + buffer = kmap_atomic(pfn_to_page(phys >> PAGE_SHIFT),
>
phys_to_page() does this for you.
> + KM_BOUNCE_READ);
> + if (dir == DMA_TO_DEVICE)
> + memcpy(dma_addr, buffer + offset, sz);
> + else
> + memcpy(buffer + offset, dma_addr, sz);
> + kunmap_atomic(buffer, KM_BOUNCE_READ);
> + local_irq_restore(flags);
> + size -= sz;
> + phys += sz;
> + dma_addr += sz;
> + offset = 0;
> + }
> +#else
> + if (dir == DMA_TO_DEVICE)
> + memcpy(dma_addr, phys_to_virt(phys), size);
> + else
> + memcpy(phys_to_virt(phys), dma_addr, size);
> +#endif
> +}
>
J
Becky Bruce wrote:
>
> On Dec 19, 2008, at 1:02 AM, Jeremy Fitzhardinge wrote:
>
>> Becky Bruce wrote:
>>>
>>>
>>> Unfortunately, this is horrible timing for me, as starting tomorrow,
>>> I'm going to be offline for a week and a half or so in podunk
>>> Louisiana with essentially no net access. I can integrate my code
>>> into your tree and test on PPC as soon as I return to the real world.
>>
>> Yeah, I think that's OK. The important thing at this point is to
>> determine whether the two patch sets are aligned or conflicting. It
>> sounds like they're largely aligned, and so generating a delta from
>> my patches to match your needs will be relatively straightforward.
>>
>> I'm trying to line all this Xen stuff up for this merge window, so
>> I'd prefer to revisit it in the next dev cycle. Did you want to get
>> something into this merge window?
>
> I was originally hoping to get something into this merge window, but
> the timing just isn't working out for me. The rest of the support is
> already in the kernel for ppc (actually, most everything is in
> 2.6.28), and swiotlb is the last bit I need to get this working. Ah
> well.... I'll hop to work on this from the ppc point of view as soon
> as I get back online, and we'll see where that takes us.
Hm, yes, they're really very similar patch sets. We can definitely turn
this into something that will work for both.
J
On Dec 19, 2008, at 11:46 AM, Jeremy Fitzhardinge wrote:
> Becky Bruce wrote:
>> On highmem systems, the original dma buffer might not
>> have a virtual mapping - we need to kmap it in to perform
>> the bounce. Extract the code that does the actual
>> copy into a function that does the kmap if highmem
>> is enabled, and defaults to the normal swiotlb memcpy
>> if not.
>>
>> Signed-off-by: Becky Bruce <[email protected]>
>> ---
>> lib/swiotlb.c | 53 ++++++++++++++++++++++++++++++++++++++++++++
>> +--------
>> 1 files changed, 45 insertions(+), 8 deletions(-)
>>
>> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
>> index e9d5bf6..ab5d3d7 100644
>> --- a/lib/swiotlb.c
>> +++ b/lib/swiotlb.c
>> @@ -14,6 +14,7 @@
>> * 04/07/.. ak Better overflow handling. Assorted fixes.
>> * 05/09/10 linville Add support for syncing ranges, support
>> syncing for
>> * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
>> + * 08/12/11 beckyb Add highmem support
>> */
>> #include <linux/cache.h>
>> @@ -24,6 +25,7 @@
>> #include <linux/string.h>
>> #include <linux/types.h>
>> #include <linux/ctype.h>
>> +#include <linux/highmem.h>
>> #include <asm/io.h>
>> #include <asm/dma.h>
>> @@ -306,6 +308,45 @@ static int is_swiotlb_buffer(char *addr)
>> }
>> /*
>> + * Bounce: copy the swiotlb buffer back to the original dma location
>> + */
>> +void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
>> + enum dma_data_direction dir)
>> +{
>> +#ifdef CONFIG_HIGHMEM
>>
>
> Rather than using #ifdef CONFIG_HIGHMEM, I think it would be better
> to use something like:
>
> if (PageHighMem(phys_to_page(phys))) {
> /* handle high page */
> } else {
> /* simple path */
> }
>
> This will still exclude all the highmem code on non-highmem systems,
> and will use the fast-path for lowmem pages too.
Right - in the ppc case, I don't ever need to bounce a lowmem page, so
I wasn't taking that into account. But what you suggest makes sense.
>
>
>> + /* The buffer may not have a mapping. Map it in and copy */
>> + unsigned int offset = ((unsigned long)phys &
>> + ((1 << PAGE_SHIFT) - 1));
>> + char *buffer;
>> + unsigned int sz = 0;
>> + unsigned long flags;
>> +
>> + while (size) {
>> + sz = ((PAGE_SIZE - offset) > size) ? size :
>> + PAGE_SIZE - offset;
>>
> sz = min(size, PAGE_SIZE - offset)?
>> + local_irq_save(flags);
>> + buffer = kmap_atomic(pfn_to_page(phys >> PAGE_SHIFT),
>>
> phys_to_page() does this for you.
Yeah, it still needs a bit of clean-up.... I need to scrub for stuff
like this. That loop changed a few times and I hacked it into working
and haven't cleaned it up yet.
Thanks for taking a look! Given how similar the two patches are, it's
pretty clear that we're either both insane, or both headed in the
right direction :)
Anyway, I'm about to turn into a pumpkin - I'll probably be online for
another hour or so sometime this afternoon, and then that's it for me
until after I get back from vacation. My apologies again for
disappearing in the middle of the fun.
Cheers,
-B
On Tue, 16 Dec 2008 12:17:31 -0800
Jeremy Fitzhardinge <[email protected]> wrote:
> From: Ian Campbell <[email protected]>
>
> Some architectures require special rules to determine whether a range
> needs mapping or not. This adds a weak function for architectures to
> override.
>
> Signed-off-by: Ian Campbell <[email protected]>
> Signed-off-by: Jeremy Fitzhardinge <[email protected]>
> ---
> include/linux/swiotlb.h | 2 ++
> lib/swiotlb.c | 15 +++++++++++++--
> 2 files changed, 15 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -30,6 +30,8 @@
> extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address);
> extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
>
> +extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size);
> +
> extern void
> *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> dma_addr_t *dma_handle, gfp_t flags);
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -145,6 +145,11 @@
> return phys_to_virt(swiotlb_bus_to_phys(address));
> }
>
> +int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
> +{
> + return 0;
> +}
Can't you simply make address_needs_mapping __weak as Becky does?
On Fri, 19 Dec 2008 09:39:58 -0800
Jeremy Fitzhardinge <[email protected]> wrote:
> Becky Bruce wrote:
> > When we enable swiotlb for platforms that support HIGHMEM, we
> > can no longer store the virtual address of the original dma
> > buffer, because that buffer might not have a permament mapping.
> > Change the iotlb code to instead store the physical address of
> > the original buffer.
> >
>
> Hm, yes, I think using a phys_addr_t may end up being cleaner than using
> struct page *+offset.
Surely, cleaner and simpler. Can you please rework [PATCH 07/11] in
that way?
On Tue, 16 Dec 2008 12:17:33 -0800
Jeremy Fitzhardinge <[email protected]> wrote:
> This requires us to treat DMA regions in terms of page+offset rather
> than virtual addressing since a HighMem page may not have a mapping.
>
> Signed-off-by: Ian Campbell <[email protected]>
> Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Hmm, swiotlb should be architecture independent code.
Can you separate the following hack for only X86_32 from swiotlb? I
don't think that X86_64 and IA64 want such hack in core swiotbl code.
> lib/swiotlb.c | 120 +++++++++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 88 insertions(+), 32 deletions(-)
>
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -26,6 +26,7 @@
> #include <linux/swiotlb.h>
> #include <linux/types.h>
> #include <linux/ctype.h>
> +#include <linux/highmem.h>
>
> #include <asm/io.h>
> #include <asm/dma.h>
> @@ -38,9 +39,6 @@
> #define OFFSET(val,align) ((unsigned long) \
> ( (val) & ( (align) - 1)))
>
> -#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
> -#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))
> -
> #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
>
> /*
> @@ -91,7 +89,10 @@
> * We need to save away the original address corresponding to a mapped entry
> * for the sync operations.
> */
> -static unsigned char **io_tlb_orig_addr;
> +static struct swiotlb_phys_addr {
> + struct page *page;
> + unsigned int offset;
> +} *io_tlb_orig_addr;
>
> /*
> * Protect the above data structures in the map and unmap calls
> @@ -150,6 +151,11 @@
> return 0;
> }
>
> +static dma_addr_t swiotlb_sg_to_bus(struct scatterlist *sg)
> +{
> + return swiotlb_phys_to_bus(page_to_phys(sg_page(sg)) + sg->offset);
> +}
> +
> /*
> * Statically reserve bounce buffer space and initialize bounce buffer data
> * structures for the software IO TLB used to implement the DMA API.
> @@ -183,7 +189,7 @@
> for (i = 0; i < io_tlb_nslabs; i++)
> io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
> io_tlb_index = 0;
> - io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
> + io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(struct swiotlb_phys_addr));
>
> /*
> * Get the overflow emergency buffer
> @@ -258,12 +264,12 @@
> io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
> io_tlb_index = 0;
>
> - io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
> - get_order(io_tlb_nslabs * sizeof(char *)));
> + io_tlb_orig_addr = (struct swiotlb_phys_addr *)__get_free_pages(GFP_KERNEL,
> + get_order(io_tlb_nslabs * sizeof(struct swiotlb_phys_addr)));
> if (!io_tlb_orig_addr)
> goto cleanup3;
>
> - memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
> + memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(struct swiotlb_phys_addr));
>
> /*
> * Get the overflow emergency buffer
> @@ -312,20 +318,59 @@
> return addr >= io_tlb_start && addr < io_tlb_end;
> }
>
> +static struct swiotlb_phys_addr swiotlb_bus_to_phys_addr(char *dma_addr)
> +{
> + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
> + struct swiotlb_phys_addr buffer = io_tlb_orig_addr[index];
> + buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1);
> + buffer.page += buffer.offset >> PAGE_SHIFT;
> + buffer.offset &= PAGE_SIZE - 1;
> + return buffer;
> +}
> +
> static void
> -__sync_single(char *buffer, char *dma_addr, size_t size, int dir)
> +__sync_single(struct swiotlb_phys_addr buffer, char *dma_addr, size_t size, int dir)
> {
> - if (dir == DMA_TO_DEVICE)
> - memcpy(dma_addr, buffer, size);
> - else
> - memcpy(buffer, dma_addr, size);
> + if (PageHighMem(buffer.page)) {
> + size_t len, bytes;
> + char *dev, *host, *kmp;
> +
> + len = size;
> + while (len != 0) {
> + unsigned long flags;
> +
> + bytes = len;
> + if ((bytes + buffer.offset) > PAGE_SIZE)
> + bytes = PAGE_SIZE - buffer.offset;
> + local_irq_save(flags); /* protects KM_BOUNCE_READ */
> + kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ);
> + dev = dma_addr + size - len;
> + host = kmp + buffer.offset;
> + if (dir == DMA_FROM_DEVICE)
> + memcpy(host, dev, bytes);
> + else
> + memcpy(dev, host, bytes);
> + kunmap_atomic(kmp, KM_BOUNCE_READ);
> + local_irq_restore(flags);
> + len -= bytes;
> + buffer.page++;
> + buffer.offset = 0;
> + }
> + } else {
> + void *v = page_address(buffer.page) + buffer.offset;
> +
> + if (dir == DMA_TO_DEVICE)
> + memcpy(dma_addr, v, size);
> + else
> + memcpy(v, dma_addr, size);
> + }
> }
>
> /*
> * Allocates bounce buffer and returns its kernel virtual address.
> */
> static void *
> -map_single(struct device *hwdev, char *buffer, size_t size, int dir)
> +map_single(struct device *hwdev, struct swiotlb_phys_addr buffer, size_t size, int dir)
> {
> unsigned long flags;
> char *dma_addr;
> @@ -335,6 +380,7 @@
> unsigned long mask;
> unsigned long offset_slots;
> unsigned long max_slots;
> + struct swiotlb_phys_addr slot_buf;
>
> mask = dma_get_seg_boundary(hwdev);
> start_dma_addr = swiotlb_virt_to_bus(io_tlb_start) & mask;
> @@ -419,8 +465,13 @@
> * This is needed when we sync the memory. Then we sync the buffer if
> * needed.
> */
> - for (i = 0; i < nslots; i++)
> - io_tlb_orig_addr[index+i] = buffer + (i << IO_TLB_SHIFT);
> + slot_buf = buffer;
> + for (i = 0; i < nslots; i++) {
> + slot_buf.page += slot_buf.offset >> PAGE_SHIFT;
> + slot_buf.offset &= PAGE_SIZE - 1;
> + io_tlb_orig_addr[index+i] = slot_buf;
> + slot_buf.offset += 1 << IO_TLB_SHIFT;
> + }
> if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
> __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
>
> @@ -436,12 +487,12 @@
> unsigned long flags;
> int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
> int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
> - char *buffer = io_tlb_orig_addr[index];
> + struct swiotlb_phys_addr buffer = swiotlb_bus_to_phys_addr(dma_addr);
>
> /*
> * First, sync the memory before unmapping the entry
> */
> - if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
> + if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
> /*
> * bounce... copy the data back into the original buffer * and
> * delete the bounce buffer.
> @@ -478,10 +529,7 @@
> sync_single(struct device *hwdev, char *dma_addr, size_t size,
> int dir, int target)
> {
> - int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
> - char *buffer = io_tlb_orig_addr[index];
> -
> - buffer += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
> + struct swiotlb_phys_addr buffer = swiotlb_bus_to_phys_addr(dma_addr);
>
> switch (target) {
> case SYNC_FOR_CPU:
> @@ -525,7 +573,10 @@
> * swiotlb_map_single(), which will grab memory from
> * the lowest available address range.
> */
> - ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
> + struct swiotlb_phys_addr buffer;
> + buffer.page = virt_to_page(NULL);
> + buffer.offset = 0;
> + ret = map_single(hwdev, buffer, size, DMA_FROM_DEVICE);
> if (!ret)
> return NULL;
> }
> @@ -593,6 +644,7 @@
> {
> dma_addr_t dev_addr = swiotlb_virt_to_bus(ptr);
> void *map;
> + struct swiotlb_phys_addr buffer;
>
> BUG_ON(dir == DMA_NONE);
> /*
> @@ -607,7 +659,9 @@
> /*
> * Oh well, have to allocate and map a bounce buffer.
> */
> - map = map_single(hwdev, ptr, size, dir);
> + buffer.page = virt_to_page(ptr);
> + buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
> + map = map_single(hwdev, buffer, size, dir);
> if (!map) {
> swiotlb_full(hwdev, size, dir, 1);
> map = io_tlb_overflow_buffer;
> @@ -752,18 +806,20 @@
> int dir, struct dma_attrs *attrs)
> {
> struct scatterlist *sg;
> - void *addr;
> + struct swiotlb_phys_addr buffer;
> dma_addr_t dev_addr;
> int i;
>
> BUG_ON(dir == DMA_NONE);
>
> for_each_sg(sgl, sg, nelems, i) {
> - addr = SG_ENT_VIRT_ADDRESS(sg);
> - dev_addr = swiotlb_virt_to_bus(addr);
> + dev_addr = swiotlb_sg_to_bus(sg);
> if (range_needs_mapping(sg_virt(sg), sg->length) ||
> address_needs_mapping(hwdev, dev_addr, sg->length)) {
> - void *map = map_single(hwdev, addr, sg->length, dir);
> + void *map;
> + buffer.page = sg_page(sg);
> + buffer.offset = sg->offset;
> + map = map_single(hwdev, buffer, sg->length, dir);
> if (!map) {
> /* Don't panic here, we expect map_sg users
> to do proper error handling. */
> @@ -803,11 +859,11 @@
> BUG_ON(dir == DMA_NONE);
>
> for_each_sg(sgl, sg, nelems, i) {
> - if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
> + if (sg->dma_address != swiotlb_sg_to_bus(sg))
> unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
> sg->dma_length, dir);
> else if (dir == DMA_FROM_DEVICE)
> - dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
> + dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
> }
> }
> EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
> @@ -836,11 +892,11 @@
> BUG_ON(dir == DMA_NONE);
>
> for_each_sg(sgl, sg, nelems, i) {
> - if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
> + if (sg->dma_address != swiotlb_sg_to_bus(sg))
> sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
> sg->dma_length, dir, target);
> else if (dir == DMA_FROM_DEVICE)
> - dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
> + dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
> }
> }
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
On Wed, 17 Dec 2008 11:48:41 +0900
FUJITA Tomonori <[email protected]> wrote:
> On Tue, 16 Dec 2008 12:17:28 -0800
> Jeremy Fitzhardinge <[email protected]> wrote:
>
> > Signed-off-by: Ian Campbell <[email protected]>
> > ---
> > lib/swiotlb.c | 10 +++-------
> > 1 file changed, 3 insertions(+), 7 deletions(-)
>
> This patch is wrong.
>
> dma_mask and coherent_dma_mask represent different restrictions.
>
> You can't use coherent_dma_mask for alloc_coherent.
Oops, "you can't use dma_mask for alloc_coherent."
On Tue, 16 Dec 2008 12:17:28 -0800
Jeremy Fitzhardinge <[email protected]> wrote:
> Signed-off-by: Ian Campbell <[email protected]>
> ---
> lib/swiotlb.c | 10 +++-------
> 1 file changed, 3 insertions(+), 7 deletions(-)
This patch is wrong.
dma_mask and coherent_dma_mask represent different restrictions.
You can't use coherent_dma_mask for alloc_coherent.
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -465,13 +465,9 @@
> dma_addr_t dev_addr;
> void *ret;
> int order = get_order(size);
> - u64 dma_mask = DMA_32BIT_MASK;
> -
> - if (hwdev && hwdev->coherent_dma_mask)
> - dma_mask = hwdev->coherent_dma_mask;
>
> ret = (void *)__get_free_pages(flags, order);
> - if (ret && !is_buffer_dma_capable(dma_mask, virt_to_bus(ret), size)) {
> + if (ret && !address_needs_mapping(hwdev, virt_to_bus(ret), size)) {
> /*
> * The allocated memory isn't reachable by the device.
> * Fall back on swiotlb_map_single().
> @@ -495,9 +491,9 @@
> dev_addr = virt_to_bus(ret);
>
> /* Confirm address can be DMA'd by device */
> - if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
> + if (!address_needs_mapping(hwdev, dev_addr, size)) {
> printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
> - (unsigned long long)dma_mask,
> + (unsigned long long)dma_get_mask(hwdev),
> (unsigned long long)dev_addr);
>
> /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
On Tue, 16 Dec 2008 21:35:13 +0100
Ingo Molnar <[email protected]> wrote:
>
> * Jeremy Fitzhardinge <[email protected]> wrote:
>
> > Hi Ingo,
> >
> > Here's some patches to clean up swiotlb to prepare for some Xen dom0
> > patches. These have been posted before, but undergone a round of
> > cleanups to address various comments.
>
> applied to tip/core/iommu, thanks Jeremy.
>
> the only patch that seems to have the potential to break drivers is:
>
> be4ac7b: swiotlb: consistently use address_needs_mapping everywhere
Yeah, as I already wrote, this patch is wrong.
I think that the whole patchset is against the swiotlb design. swiotlb
is designed to be used as a library. Each architecture implements the
own swiotlb by using swiotlb library
(e.g. arch/x86/kernel/pci-swiotlb_64.c).
For example, adding the following code (9/14) for just Xen that the
majority of swiotbl users (x86_64 and IA64) don't need to the library
is against the design.
Can we have something like pci-swiotlb_xen.c (with some modifications
to the swiotlb library code)?
+__sync_single(struct swiotlb_phys_addr buffer, char *dma_addr, size_t size, int dir)
{
- if (dir == DMA_TO_DEVICE)
- memcpy(dma_addr, buffer, size);
- else
- memcpy(buffer, dma_addr, size);
+ if (PageHighMem(buffer.page)) {
+ size_t len, bytes;
+ char *dev, *host, *kmp;
+
+ len = size;
+ while (len != 0) {
+ unsigned long flags;
+
+ bytes = len;
+ if ((bytes + buffer.offset) > PAGE_SIZE)
+ bytes = PAGE_SIZE - buffer.offset;
+ local_irq_save(flags); /* protects KM_BOUNCE_READ */
+ kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ);
+ dev = dma_addr + size - len;
+ host = kmp + buffer.offset;
+ if (dir == DMA_FROM_DEVICE)
+ memcpy(host, dev, bytes);
+ else
+ memcpy(dev, host, bytes);
+ kunmap_atomic(kmp, KM_BOUNCE_READ);
+ local_irq_restore(flags);
+ len -= bytes;
+ buffer.page++;
+ buffer.offset = 0;
+ }
+ } else {
+ void *v = page_address(buffer.page) + buffer.offset;
+
+ if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, v, size);
+ else
+ memcpy(v, dma_addr, size);
+ }
}
>I think that the whole patchset is against the swiotlb design. swiotlb
>is designed to be used as a library. Each architecture implements the
>own swiotlb by using swiotlb library
>(e.g. arch/x86/kernel/pci-swiotlb_64.c).
If it is a library, then it should be prepared to serve all possible users.
>For example, adding the following code (9/14) for just Xen that the
>majority of swiotbl users (x86_64 and IA64) don't need to the library
>is against the design.
"Don't" in terms of "currently don't": Once x86-64 wants to support more
than 46 physical address bits, it's not impossible that this would lead to
CONFIG_HIGHMEM getting introduced there, and then it'll be helpful that the
code is already prepared to deal with that case.
After all, the code portion in question ought to compile to nothing if
!CONFIG_HIGHMEM.
Jan
FUJITA Tomonori wrote:
> On Tue, 16 Dec 2008 21:35:13 +0100
> Ingo Molnar <[email protected]> wrote:
>
>
>> * Jeremy Fitzhardinge <[email protected]> wrote:
>>
>>
>>> Hi Ingo,
>>>
>>> Here's some patches to clean up swiotlb to prepare for some Xen dom0
>>> patches. These have been posted before, but undergone a round of
>>> cleanups to address various comments.
>>>
>> applied to tip/core/iommu, thanks Jeremy.
>>
>> the only patch that seems to have the potential to break drivers is:
>>
>> be4ac7b: swiotlb: consistently use address_needs_mapping everywhere
>>
>
> Yeah, as I already wrote, this patch is wrong.
>
I'll have a look.
> I think that the whole patchset is against the swiotlb design. swiotlb
> is designed to be used as a library. Each architecture implements the
> own swiotlb by using swiotlb library
> (e.g. arch/x86/kernel/pci-swiotlb_64.c).
>
The whole patchset? The bulk of the changes to lib/swiotlb.c are
relatively minor to remove the unwarranted assumptions it is making in
the face of a new user. They will have no effect on other existing
users, including non-Xen x86 builds.
If you have specific objections we can discuss those, but I don't think
there's anything fundamentally wrong with making lib/swiotlb.c a bit
more generically useful.
> For example, adding the following code (9/14) for just Xen that the
> majority of swiotbl users (x86_64 and IA64) don't need to the library
> is against the design.
>
If the architecture doesn't support highmem then this code will compile
to nothing - PageHighMem() will always evaluate to 0. It will therefore
have zero effect on the code generated for IA64 or x86-64. This is not
really a Xen-specific change, but a result of adding swiotlb support for
i386. Other architectures which support a notion of highmem would also
need this code if they wanted to use swiotlb.
J
FUJITA Tomonori wrote:
> On Tue, 16 Dec 2008 12:17:28 -0800
> Jeremy Fitzhardinge <[email protected]> wrote:
>
>
>> Signed-off-by: Ian Campbell <[email protected]>
>> ---
>> lib/swiotlb.c | 10 +++-------
>> 1 file changed, 3 insertions(+), 7 deletions(-)
>>
>
> This patch is wrong.
>
> dma_mask and coherent_dma_mask represent different restrictions.
>
> You can't use coherent_dma_mask for alloc_coherent.
>
OK. It looks like this patch is just an (erroneous) cleanup, so we can
drop it.
J
On Wed, 2008-12-17 at 11:51 +0900, FUJITA Tomonori wrote:
> On Wed, 17 Dec 2008 11:48:41 +0900
> FUJITA Tomonori <[email protected]> wrote:
>
> > On Tue, 16 Dec 2008 12:17:28 -0800
> > Jeremy Fitzhardinge <[email protected]> wrote:
> >
> > > Signed-off-by: Ian Campbell <[email protected]>
> > > ---
> > > lib/swiotlb.c | 10 +++-------
> > > 1 file changed, 3 insertions(+), 7 deletions(-)
> >
> > This patch is wrong.
> >
> > dma_mask and coherent_dma_mask represent different restrictions.
> >
> > You can't use coherent_dma_mask for alloc_coherent.
>
> Oops, "you can't use dma_mask for alloc_coherent."
You are right, I completely overlooked that it was coherent_dma_mask in
one place and just dma_mask in the other, sorry for the noise.
Ian.
On Wed, 17 Dec 2008 08:47:47 +0000
"Jan Beulich" <[email protected]> wrote:
> >I think that the whole patchset is against the swiotlb design. swiotlb
> >is designed to be used as a library. Each architecture implements the
> >own swiotlb by using swiotlb library
> >(e.g. arch/x86/kernel/pci-swiotlb_64.c).
>
> If it is a library, then it should be prepared to serve all possible users.
I don't against changing swiotlb library to make it usable for Xen.
> >For example, adding the following code (9/14) for just Xen that the
> >majority of swiotbl users (x86_64 and IA64) don't need to the library
> >is against the design.
>
> "Don't" in terms of "currently don't": Once x86-64 wants to support more
> than 46 physical address bits, it's not impossible that this would lead to
> CONFIG_HIGHMEM getting introduced there, and then it'll be helpful that the
> code is already prepared to deal with that case.
If you seriously think "adding highmem support to x86_64 would
happen", please take a look at:
http://lkml.org/lkml/2008/12/16/306
> After all, the code portion in question ought to compile to nothing if
> !CONFIG_HIGHMEM.
Adding such complication to the generic swiotlb code seriously hurts
its readability and maintainability. X86_64 and IA64 should not surfer
such damage.
On Wed, 17 Dec 2008 08:31:43 -0800
Jeremy Fitzhardinge <[email protected]> wrote:
> > I think that the whole patchset is against the swiotlb design. swiotlb
> > is designed to be used as a library. Each architecture implements the
> > own swiotlb by using swiotlb library
> > (e.g. arch/x86/kernel/pci-swiotlb_64.c).
> >
>
> The whole patchset? The bulk of the changes to lib/swiotlb.c are
> relatively minor to remove the unwarranted assumptions it is making in
> the face of a new user. They will have no effect on other existing
> users, including non-Xen x86 builds.
>
> If you have specific objections we can discuss those, but I don't think
> there's anything fundamentally wrong with making lib/swiotlb.c a bit
> more generically useful.
Sorry, but the highmem support is not generically useful.
I'm especially against the highmem support. As you said, the rest
looks fine but if you go with pci-swiotlb_32.c, I think that you don't
need the most of them.
> > For example, adding the following code (9/14) for just Xen that the
> > majority of swiotbl users (x86_64 and IA64) don't need to the library
> > is against the design.
> >
>
> If the architecture doesn't support highmem then this code will compile
> to nothing - PageHighMem() will always evaluate to 0. It will therefore
I'm not talking about it will be complied or not. As I wrote in
another mail, I'm talking about the maintainability and readability of
the common library.
> have zero effect on the code generated for IA64 or x86-64. This is not
> really a Xen-specific change, but a result of adding swiotlb support for
> i386. Other architectures which support a notion of highmem would also
> need this code if they wanted to use swiotlb.
Can you be more specific? What architecture is plan to use highmem
support in swiotlb?
FUJITA Tomonori wrote:
> On Wed, 17 Dec 2008 08:31:43 -0800
> Jeremy Fitzhardinge <[email protected]> wrote:
>
>
>>> I think that the whole patchset is against the swiotlb design. swiotlb
>>> is designed to be used as a library. Each architecture implements the
>>> own swiotlb by using swiotlb library
>>> (e.g. arch/x86/kernel/pci-swiotlb_64.c).
>>>
>>>
>> The whole patchset? The bulk of the changes to lib/swiotlb.c are
>> relatively minor to remove the unwarranted assumptions it is making in
>> the face of a new user. They will have no effect on other existing
>> users, including non-Xen x86 builds.
>>
>> If you have specific objections we can discuss those, but I don't think
>> there's anything fundamentally wrong with making lib/swiotlb.c a bit
>> more generically useful.
>>
>
> Sorry, but the highmem support is not generically useful.
>
That's a circular argument. lib/swiotlb currently used by 1 1/2 of the
23 architectures, neither of which happens to use highmem. If you
consider swiotlb to be a general purpose mechanism, then presumably the
other 21 1/2 architectures are at least potential users (and 6 1/2 of
those have highmem configurations). If you base your judgement of
what's a "generically useful" change based on what the current users
need, then you'll naturally exclude the requirements of all the other
(potential) users.
And the matter arises now because we're trying to unify the use of
swiotlb in x86, bringing the number of users up to 2.
> I'm especially against the highmem support. As you said, the rest
> looks fine but if you go with pci-swiotlb_32.c, I think that you don't
> need the most of them.
>
I really don't want to have to duplicate a lot of code just to
incorporate a few small changes. In fact the original Xen patch set
included its own swiotlb implementation, and that was rejected on the
grounds that we should use the common swiotlb.c.
This patch set abstracts out a few more architecture-specific details,
to allow the common swiotlb logic to be in one place. By not putting
highmem support into lib/swiotlb.c, you're effectively saying that we
should have separate lib/swiotlb-nohighmem.c and lib/swiotlb-highmem.c
files, with almost identical contents aside from the few places where we
need to use page+offset rather than a kernel vaddr.
Yes, highmem is ugly, and there'll be cheering in the streets when we
can finally kill the last highmem user. But for now we need to deal
with it, and unfortunately swiotlb is now one of the places that it
affects. Its true that adding highmem support to swiotlb.c increases
the maintenance burden to some extent. But I can't see an alternative
that doesn't increase the burden a lot more.
We can look at abstracting things a bit more so that non-highmem
architectures can keep using simple addresses rather than page+offset,
but that highmem code has got to be *somewhere*.
>> > have zero effect on the code generated for IA64 or x86-64. This is not
>> > really a Xen-specific change, but a result of adding swiotlb support for
>> > i386. Other architectures which support a notion of highmem would also
>> > need this code if they wanted to use swiotlb.
>>
>
> Can you be more specific? What architecture is plan to use highmem
> support in swiotlb?
>
Well, concretely, x86-32. I don't know about the others, but I don't
see why there's an inherent reason that they won't have a problem that
swiotlb solves.
J
* Jeremy Fitzhardinge <[email protected]> wrote:
> FUJITA Tomonori wrote:
>> On Wed, 17 Dec 2008 08:31:43 -0800
>> Jeremy Fitzhardinge <[email protected]> wrote:
>>
>>
>>>> I think that the whole patchset is against the swiotlb design. swiotlb
>>>> is designed to be used as a library. Each architecture implements the
>>>> own swiotlb by using swiotlb library
>>>> (e.g. arch/x86/kernel/pci-swiotlb_64.c).
>>>>
>>> The whole patchset? The bulk of the changes to lib/swiotlb.c are
>>> relatively minor to remove the unwarranted assumptions it is making
>>> in the face of a new user. They will have no effect on other
>>> existing users, including non-Xen x86 builds.
>>>
>>> If you have specific objections we can discuss those, but I don't
>>> think there's anything fundamentally wrong with making lib/swiotlb.c
>>> a bit more generically useful.
>>>
>>
>> Sorry, but the highmem support is not generically useful.
>>
>
> That's a circular argument. lib/swiotlb currently used by 1 1/2 of the
> 23 architectures, neither of which happens to use highmem. If you
> consider swiotlb to be a general purpose mechanism, then presumably the
> other 21 1/2 architectures are at least potential users (and 6 1/2 of
> those have highmem configurations). If you base your judgement of
> what's a "generically useful" change based on what the current users
> need, then you'll naturally exclude the requirements of all the other
> (potential) users.
>
> And the matter arises now because we're trying to unify the use of
> swiotlb in x86, bringing the number of users up to 2.
>
>> I'm especially against the highmem support. As you said, the rest looks
>> fine but if you go with pci-swiotlb_32.c, I think that you don't need
>> the most of them.
>>
>
> I really don't want to have to duplicate a lot of code just to
> incorporate a few small changes. In fact the original Xen patch set
> included its own swiotlb implementation, and that was rejected on the
> grounds that we should use the common swiotlb.c.
duplicating that would not be a very good design - and 32-bit highmem is a
reality we have to live with for some time to come. The impact:
10 files changed, 251 insertions(+), 81 deletions(-)
looks rather to the point and seems relatively compressed. In fact 32-bit
Xen could end up being the largest user (and tester) of swiotlb facilities
in general, as modern 64-bit platforms tend to have hw IOMMUs. Having more
code sharing and more testers is a plus.
Ingo
On Thu, 18 Dec 2008 14:23:13 +0100
Ingo Molnar <[email protected]> wrote:
>
> * Jeremy Fitzhardinge <[email protected]> wrote:
>
> > FUJITA Tomonori wrote:
> >> On Wed, 17 Dec 2008 08:31:43 -0800
> >> Jeremy Fitzhardinge <[email protected]> wrote:
> >>
> >>
> >>>> I think that the whole patchset is against the swiotlb design. swiotlb
> >>>> is designed to be used as a library. Each architecture implements the
> >>>> own swiotlb by using swiotlb library
> >>>> (e.g. arch/x86/kernel/pci-swiotlb_64.c).
> >>>>
> >>> The whole patchset? The bulk of the changes to lib/swiotlb.c are
> >>> relatively minor to remove the unwarranted assumptions it is making
> >>> in the face of a new user. They will have no effect on other
> >>> existing users, including non-Xen x86 builds.
> >>>
> >>> If you have specific objections we can discuss those, but I don't
> >>> think there's anything fundamentally wrong with making lib/swiotlb.c
> >>> a bit more generically useful.
> >>>
> >>
> >> Sorry, but the highmem support is not generically useful.
> >>
> >
> > That's a circular argument. lib/swiotlb currently used by 1 1/2 of the
> > 23 architectures, neither of which happens to use highmem. If you
> > consider swiotlb to be a general purpose mechanism, then presumably the
> > other 21 1/2 architectures are at least potential users (and 6 1/2 of
> > those have highmem configurations). If you base your judgement of
> > what's a "generically useful" change based on what the current users
> > need, then you'll naturally exclude the requirements of all the other
> > (potential) users.
> >
> > And the matter arises now because we're trying to unify the use of
> > swiotlb in x86, bringing the number of users up to 2.
> >
> >> I'm especially against the highmem support. As you said, the rest looks
> >> fine but if you go with pci-swiotlb_32.c, I think that you don't need
> >> the most of them.
> >>
> >
> > I really don't want to have to duplicate a lot of code just to
> > incorporate a few small changes. In fact the original Xen patch set
> > included its own swiotlb implementation, and that was rejected on the
> > grounds that we should use the common swiotlb.c.
>
> duplicating that would not be a very good design - and 32-bit highmem is a
> reality we have to live with for some time to come. The impact:
>
> 10 files changed, 251 insertions(+), 81 deletions(-)
It's not about the number of the lines. For example, X86_64 and IA64
have to use the following ugly code:
+static struct swiotlb_phys_addr swiotlb_bus_to_phys_addr(char *dma_addr)
+{
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ struct swiotlb_phys_addr buffer = io_tlb_orig_addr[index];
+ buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1);
+ buffer.page += buffer.offset >> PAGE_SHIFT;
+ buffer.offset &= PAGE_SIZE - 1;
+ return buffer;
+}
Why do they suffers such code even if they don't need it at all?
> looks rather to the point and seems relatively compressed. In fact 32-bit
> Xen could end up being the largest user (and tester) of swiotlb facilities
> in general, as modern 64-bit platforms tend to have hw IOMMUs.
I don't think so. Yes, modern 64-bit platforms tend to have hw IOMMUs
but I think that the majority don't use it. The modern I/O devices
(such as HBA) are capable of 64bit address DMA. So if you don't use
virtualization, you don't want IOMMU overheads (software and
hardware). So I think that 32-bit Xen is a fringe user of swiotlb.
> Having more code sharing and more testers is a plus.
If you read the patch (such as the above code), it unnecessarily adds
huge complicity to X86_64 and IA64. It just hurts stability,
readability and maintainability. There is no plus.
If multiple architectures need highmem in swiotlb, adding it to
lib/swiotlb.c makes sense. But what Xen people insist is, we should be
prepare for unlikely events, such as 'x86_64 would want to support
highmem' and they have no idea about who but we should be prepare for
someone who could use it.
I believe that adding ugly complicity to the generic code for unlikely
events is a bad idea.
I don't insist that Xen people need to duplicate all the swiotlb
code. It's fine to make it usable for x86. But I'm against adding the
code that only x86 uses to lib/swiotlb.c. That's should be in
arch/x86/pci-swiotlb_32.c. We already have
arch/x86/pci-swiotlb_64.c. If you add the code that only x86_32 use to
the generic library code (lib/swiotlb.c), why do we have
arch/x86/pci-swiotlb_64.c?
I know that you will merge the patchset anyway but I'm strongly
against the highmem patch.
On Dec 17, 2008, at 10:56 AM, FUJITA Tomonori wrote:
> On Wed, 17 Dec 2008 08:31:43 -0800
> Jeremy Fitzhardinge <[email protected]> wrote:
>
>>> I think that the whole patchset is against the swiotlb design.
>>> swiotlb
>>> is designed to be used as a library. Each architecture implements
>>> the
>>> own swiotlb by using swiotlb library
>>> (e.g. arch/x86/kernel/pci-swiotlb_64.c).
>>>
>>
>> The whole patchset? The bulk of the changes to lib/swiotlb.c are
>> relatively minor to remove the unwarranted assumptions it is making
>> in
>> the face of a new user. They will have no effect on other existing
>> users, including non-Xen x86 builds.
>>
>> If you have specific objections we can discuss those, but I don't
>> think
>> there's anything fundamentally wrong with making lib/swiotlb.c a bit
>> more generically useful.
>
> Sorry, but the highmem support is not generically useful.
>
> I'm especially against the highmem support. As you said, the rest
> looks fine but if you go with pci-swiotlb_32.c, I think that you don't
> need the most of them.
>
>
>
>>> For example, adding the following code (9/14) for just Xen that the
>>> majority of swiotbl users (x86_64 and IA64) don't need to the
>>> library
>>> is against the design.
>>>
>>
>> If the architecture doesn't support highmem then this code will
>> compile
>> to nothing - PageHighMem() will always evaluate to 0. It will
>> therefore
>
> I'm not talking about it will be complied or not. As I wrote in
> another mail, I'm talking about the maintainability and readability of
> the common library.
>
>
>> have zero effect on the code generated for IA64 or x86-64. This is
>> not
>> really a Xen-specific change, but a result of adding swiotlb
>> support for
>> i386. Other architectures which support a notion of highmem would
>> also
>> need this code if they wanted to use swiotlb.
>
> Can you be more specific? What architecture is plan to use highmem
> support in swiotlb?
32-bit powerpc needs this support - I was actually about to push a
similar set of patches. We have several processors that support 36
bits of physical address space and do not have any iommu capability.
The rest of the kernel support for those processors is now in place,
so swiotlb is the last piece of the puzzle for that to be fully
functional. I need to take a closer look at this series to see
exactly what it's doing and how it differs from what I've been testing.
So there is another immediate use case, and I'd really hate to see
this code duplicated. It should be entirely possible to remove the
assumption that we can save off the VA of the original buffer, which
is the thing that precludes HIGHMEM support, and still have nice
readable, maintainable code.
Cheers,
-Becky
Becky Bruce wrote:
> 32-bit powerpc needs this support - I was actually about to push a
> similar set of patches. We have several processors that support 36
> bits of physical address space and do not have any iommu capability.
> The rest of the kernel support for those processors is now in place,
> so swiotlb is the last piece of the puzzle for that to be fully
> functional. I need to take a closer look at this series to see
> exactly what it's doing and how it differs from what I've been testing.
>
> So there is another immediate use case, and I'd really hate to see
> this code duplicated. It should be entirely possible to remove the
> assumption that we can save off the VA of the original buffer, which
> is the thing that precludes HIGHMEM support, and still have nice
> readable, maintainable code.
Good, I'm glad I'm not just making things up ;)
The gist of the patch is to convert all the kernel virtual addresses
into struct page+offset. It's a fairly generic way to handle highmem
systems, so it should work for you too. Of course, if you have a
cleaner patch to achieve the same result, then we evaluate that too.
J
* Becky Bruce <[email protected]> wrote:
>> Can you be more specific? What architecture is plan to use highmem
>> support in swiotlb?
>
> 32-bit powerpc needs this support - I was actually about to push a
> similar set of patches. We have several processors that support 36 bits
> of physical address space and do not have any iommu capability. The
> rest of the kernel support for those processors is now in place, so
> swiotlb is the last piece of the puzzle for that to be fully functional.
> I need to take a closer look at this series to see exactly what it's
> doing and how it differs from what I've been testing.
>
> So there is another immediate use case, and I'd really hate to see this
> code duplicated. It should be entirely possible to remove the
> assumption that we can save off the VA of the original buffer, which is
> the thing that precludes HIGHMEM support, and still have nice readable,
> maintainable code.
you can have a look at Jeremy's current lineup of patches in
tip/core/iotlb:
http://people.redhat.com/mingo/tip.git/README
(also integrated into tip/master)
What would be needed to make it suit your usecase too?
Ingo