While working on 10Gb/s routing performance I found a significant amount of
time was being spent in the swiotlb DMA handler. Further digging found that a
significant amount of this was due to virtual to physical address translation
and calling the function that did it. It accounted for nearly 60% of the
total swiotlb overhead.
This patch set works to resolve that by replacing the io_tlb_start virtual
address with io_tlb_addr which is a physical address. In addition it changes
the io_tlb_overflow_buffer from a virtual to a physical address. I followed
through with the cleanup to the point that the only functions that really
require the virtual address for the DMA buffer are the init, free, and
bounce functions.
In the case of devices that are using the bounce buffers these patches should
result in only a slight performance gain if any. This is due to the locking
overhead required to map and unmap the buffers.
In the case of devices that are not making use of bounce buffers these patches
can significantly reduce their overhead. In the case of an ixgbe routing test
for example, these changes result in 7 fewer calls to __phys_addr and
allow is_swiotlb_buffer to become inlined due to a reduction in the number of
instructions. When running a routing throughput test using small packets I
saw roughly a 5% increase in packets rates after applying these patches. This
appears to match up with the CPU overhead reduction I was tracking via perf.
Before:
Results 10.29Mps
# Overhead Symbol
# ........ ...........................................................................................................
#
1.97% [k] __phys_addr
|
|--24.97%-- swiotlb_sync_single
|
|--16.55%-- is_swiotlb_buffer
|
|--11.25%-- unmap_single
|
--2.71%-- swiotlb_dma_mapping_error
1.66% [k] swiotlb_sync_single
1.45% [k] is_swiotlb_buffer
0.53% [k] unmap_single
0.52% [k] swiotlb_map_page
0.47% [k] swiotlb_sync_single_for_device
0.43% [k] swiotlb_sync_single_for_cpu
0.42% [k] swiotlb_dma_mapping_error
0.34% [k] swiotlb_unmap_page
After:
Results 10.99Mps
# Overhead Symbol
# ........ ...........................................................................................................
#
0.50% [k] swiotlb_map_page
0.50% [k] swiotlb_sync_single
0.36% [k] swiotlb_sync_single_for_cpu
0.35% [k] swiotlb_sync_single_for_device
0.25% [k] swiotlb_unmap_page
0.17% [k] swiotlb_dma_mapping_error
Finally, I updated the parameter names for several of the core function calls
as there was some ambiguity in naming. Specifically virtual address pointers
were named dma_addr. When I changed these pointers to physical I instead used
the name tlb_addr as this value represented a physical address in the
io_tlb_addr region and is less likely to be confused with a bus address.
---
Alexander Duyck (7):
swiotlb: Do not export swiotlb_bounce since there are no external consumers
swiotlb: Use physical addresses instead of virtual in swiotlb_tbl_sync_single
swiotlb: Use physical addresses for swiotlb_tbl_unmap_single
swiotlb: Return physical addresses when calling swiotlb_tbl_map_single
swiotlb: Make io_tlb_overflow_buffer a physical address
swiotlb: Replace virtual io_tlb_start with physical io_tlb_addr
swiotlb: Instead of tracking the end of the swiotlb region just calculate it
drivers/xen/swiotlb-xen.c | 25 ++--
include/linux/swiotlb.h | 20 ++-
lib/swiotlb.c | 285 +++++++++++++++++++++++----------------------
3 files changed, 170 insertions(+), 160 deletions(-)
--
In the case of swiotlb we already have the start of the region and the number
of slabs that give us the region size. Instead of having to call
virt_to_phys on two pointers we can just take advantage of the fact that the
region is linear and just compute the end based on the start plus the size.
Signed-off-by: Alexander Duyck <[email protected]>
---
lib/swiotlb.c | 25 ++++++++++++-------------
1 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index f114bf6..5cc4d4e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -57,11 +57,11 @@ int swiotlb_force;
* swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
* API.
*/
-static char *io_tlb_start, *io_tlb_end;
+static char *io_tlb_start;
/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
+ * The number of IO TLB blocks (in groups of 64).
+ * This is command line adjustable via setup_io_tlb_npages.
*/
static unsigned long io_tlb_nslabs;
@@ -128,11 +128,11 @@ void swiotlb_print_info(void)
phys_addr_t pstart, pend;
pstart = virt_to_phys(io_tlb_start);
- pend = virt_to_phys(io_tlb_end);
+ pend = pstart + bytes;
printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
(unsigned long long)pstart, (unsigned long long)pend - 1,
- bytes >> 20, io_tlb_start, io_tlb_end - 1);
+ bytes >> 20, io_tlb_start, io_tlb_start + bytes - 1);
}
void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
@@ -143,12 +143,10 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
io_tlb_nslabs = nslabs;
io_tlb_start = tlb;
- io_tlb_end = io_tlb_start + bytes;
/*
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
*/
io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
for (i = 0; i < io_tlb_nslabs; i++)
@@ -254,14 +252,12 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
io_tlb_nslabs = nslabs;
io_tlb_start = tlb;
- io_tlb_end = io_tlb_start + bytes;
memset(io_tlb_start, 0, bytes);
/*
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
*/
io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
get_order(io_tlb_nslabs * sizeof(int)));
@@ -304,7 +300,6 @@ cleanup3:
sizeof(int)));
io_tlb_list = NULL;
cleanup2:
- io_tlb_end = NULL;
io_tlb_start = NULL;
io_tlb_nslabs = 0;
return -ENOMEM;
@@ -339,8 +334,10 @@ void __init swiotlb_free(void)
static int is_swiotlb_buffer(phys_addr_t paddr)
{
- return paddr >= virt_to_phys(io_tlb_start) &&
- paddr < virt_to_phys(io_tlb_end);
+ phys_addr_t swiotlb_start = virt_to_phys(io_tlb_start);
+
+ return paddr >= swiotlb_start &&
+ paddr < (swiotlb_start + (io_tlb_nslabs << IO_TLB_SHIFT));
}
/*
@@ -938,6 +935,8 @@ EXPORT_SYMBOL(swiotlb_dma_mapping_error);
int
swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask;
+ unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+ return swiotlb_virt_to_bus(hwdev, io_tlb_start + bytes - 1) <= mask;
}
EXPORT_SYMBOL(swiotlb_dma_supported);
This change replaces all references to the virtual address for io_tlb_start
with references to the physical address io_tlb_addr. The main advantage of
replacing the virtual address with a physical address is that we can avoid
having to do multiple translations from the virtual address to the physical
one needed for testing an existing DMA address.
Signed-off-by: Alexander Duyck <[email protected]>
---
lib/swiotlb.c | 67 +++++++++++++++++++++++++++++----------------------------
1 files changed, 34 insertions(+), 33 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 5cc4d4e..3c45f10 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -53,11 +53,11 @@
int swiotlb_force;
/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
+ * Physical address for swiotlb bounce buffer. Used to do a quick range
+ * check in swiotlb_tbl_unmap_single and swiotlb_tbl_sync_single_*, to see
+ * if the memory was in fact allocated by this API.
*/
-static char *io_tlb_start;
+phys_addr_t io_tlb_addr;
/*
* The number of IO TLB blocks (in groups of 64).
@@ -125,14 +125,15 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
void swiotlb_print_info(void)
{
unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
- phys_addr_t pstart, pend;
+ unsigned char *vstart, *vend;
- pstart = virt_to_phys(io_tlb_start);
- pend = pstart + bytes;
+ vstart = phys_to_virt(io_tlb_addr);
+ vend = vstart + bytes;
printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
- (unsigned long long)pstart, (unsigned long long)pend - 1,
- bytes >> 20, io_tlb_start, io_tlb_start + bytes - 1);
+ (unsigned long long)io_tlb_addr,
+ (unsigned long long)io_tlb_addr + bytes - 1,
+ bytes >> 20, vstart, vend - 1);
}
void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
@@ -142,7 +143,7 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
bytes = nslabs << IO_TLB_SHIFT;
io_tlb_nslabs = nslabs;
- io_tlb_start = tlb;
+ io_tlb_addr = __pa(tlb);
/*
* Allocate and initialize the free list array. This array is used
@@ -171,6 +172,7 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
static void __init
swiotlb_init_with_default_size(size_t default_size, int verbose)
{
+ unsigned char *vstart;
unsigned long bytes;
if (!io_tlb_nslabs) {
@@ -183,11 +185,11 @@ swiotlb_init_with_default_size(size_t default_size, int verbose)
/*
* Get IO TLB memory from the low pages
*/
- io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
- if (!io_tlb_start)
+ vstart = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
+ if (!vstart)
panic("Cannot allocate SWIOTLB buffer");
- swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
+ swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose);
}
void __init
@@ -205,6 +207,7 @@ int
swiotlb_late_init_with_default_size(size_t default_size)
{
unsigned long bytes, req_nslabs = io_tlb_nslabs;
+ unsigned char *vstart = NULL;
unsigned int order;
int rc = 0;
@@ -221,14 +224,14 @@ swiotlb_late_init_with_default_size(size_t default_size)
bytes = io_tlb_nslabs << IO_TLB_SHIFT;
while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
- io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
- order);
- if (io_tlb_start)
+ vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+ order);
+ if (vstart)
break;
order--;
}
- if (!io_tlb_start) {
+ if (!vstart) {
io_tlb_nslabs = req_nslabs;
return -ENOMEM;
}
@@ -237,9 +240,9 @@ swiotlb_late_init_with_default_size(size_t default_size)
"for software IO TLB\n", (PAGE_SIZE << order) >> 20);
io_tlb_nslabs = SLABS_PER_PAGE << order;
}
- rc = swiotlb_late_init_with_tbl(io_tlb_start, io_tlb_nslabs);
+ rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
if (rc)
- free_pages((unsigned long)io_tlb_start, order);
+ free_pages((unsigned long)vstart, order);
return rc;
}
@@ -251,9 +254,9 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
bytes = nslabs << IO_TLB_SHIFT;
io_tlb_nslabs = nslabs;
- io_tlb_start = tlb;
+ io_tlb_addr = virt_to_phys(tlb);
- memset(io_tlb_start, 0, bytes);
+ memset(tlb, 0, bytes);
/*
* Allocate and initialize the free list array. This array is used
@@ -300,7 +303,7 @@ cleanup3:
sizeof(int)));
io_tlb_list = NULL;
cleanup2:
- io_tlb_start = NULL;
+ io_tlb_addr = 0;
io_tlb_nslabs = 0;
return -ENOMEM;
}
@@ -317,7 +320,7 @@ void __init swiotlb_free(void)
get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
sizeof(int)));
- free_pages((unsigned long)io_tlb_start,
+ free_pages((unsigned long)phys_to_virt(io_tlb_addr),
get_order(io_tlb_nslabs << IO_TLB_SHIFT));
} else {
free_bootmem_late(__pa(io_tlb_overflow_buffer),
@@ -326,7 +329,7 @@ void __init swiotlb_free(void)
PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
free_bootmem_late(__pa(io_tlb_list),
PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
- free_bootmem_late(__pa(io_tlb_start),
+ free_bootmem_late(io_tlb_addr,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
}
io_tlb_nslabs = 0;
@@ -334,10 +337,8 @@ void __init swiotlb_free(void)
static int is_swiotlb_buffer(phys_addr_t paddr)
{
- phys_addr_t swiotlb_start = virt_to_phys(io_tlb_start);
-
- return paddr >= swiotlb_start &&
- paddr < (swiotlb_start + (io_tlb_nslabs << IO_TLB_SHIFT));
+ return paddr >= io_tlb_addr &&
+ paddr < (io_tlb_addr + (io_tlb_nslabs << IO_TLB_SHIFT));
}
/*
@@ -450,7 +451,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
io_tlb_list[i] = 0;
for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
io_tlb_list[i] = ++count;
- dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+ dma_addr = (char *)phys_to_virt(io_tlb_addr) + (index << IO_TLB_SHIFT);
/*
* Update the indices to avoid searching in the next
@@ -494,7 +495,7 @@ static void *
map_single(struct device *hwdev, phys_addr_t phys, size_t size,
enum dma_data_direction dir)
{
- dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
+ dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_addr);
return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
}
@@ -508,7 +509,7 @@ swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
{
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ int index = (dma_addr - (char *)phys_to_virt(io_tlb_addr)) >> IO_TLB_SHIFT;
phys_addr_t phys = io_tlb_orig_addr[index];
/*
@@ -549,7 +550,7 @@ swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
enum dma_data_direction dir,
enum dma_sync_target target)
{
- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ int index = (dma_addr - (char *)phys_to_virt(io_tlb_addr)) >> IO_TLB_SHIFT;
phys_addr_t phys = io_tlb_orig_addr[index];
phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
@@ -937,6 +938,6 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
- return swiotlb_virt_to_bus(hwdev, io_tlb_start + bytes - 1) <= mask;
+ return phys_to_dma(hwdev, io_tlb_addr + bytes - 1) <= mask;
}
EXPORT_SYMBOL(swiotlb_dma_supported);
This change makes it so that we can avoid virt_to_phys overhead when using the
io_tlb_overflow_buffer. My original plan was to completely remove the value
and replace it with a constant but I had seen that there were recent patches
that stated this couldn't be done until all device drivers that depended on
that functionality be updated.
Signed-off-by: Alexander Duyck <[email protected]>
---
lib/swiotlb.c | 61 ++++++++++++++++++++++++++++++++-------------------------
1 files changed, 34 insertions(+), 27 deletions(-)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 3c45f10..bbf36d1 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -70,7 +70,7 @@ static unsigned long io_tlb_nslabs;
*/
static unsigned long io_tlb_overflow = 32*1024;
-static void *io_tlb_overflow_buffer;
+phys_addr_t io_tlb_overflow_buffer;
/*
* This is a free list describing the number of free entries available from
@@ -138,6 +138,7 @@ void swiotlb_print_info(void)
void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
{
+ void *v_overflow_buffer;
unsigned long i, bytes;
bytes = nslabs << IO_TLB_SHIFT;
@@ -146,6 +147,15 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
io_tlb_addr = __pa(tlb);
/*
+ * Get the overflow emergency buffer
+ */
+ v_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
+ if (!v_overflow_buffer)
+ panic("Cannot allocate SWIOTLB overflow buffer!\n");
+
+ io_tlb_overflow_buffer = __pa(v_overflow_buffer);
+
+ /*
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
*/
@@ -155,12 +165,6 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
io_tlb_index = 0;
io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
- /*
- * Get the overflow emergency buffer
- */
- io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
- if (!io_tlb_overflow_buffer)
- panic("Cannot allocate SWIOTLB overflow buffer!\n");
if (verbose)
swiotlb_print_info();
}
@@ -250,6 +254,7 @@ int
swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
{
unsigned long i, bytes;
+ unsigned char *v_overflow_buffer;
bytes = nslabs << IO_TLB_SHIFT;
@@ -259,13 +264,23 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
memset(tlb, 0, bytes);
/*
+ * Get the overflow emergency buffer
+ */
+ v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
+ get_order(io_tlb_overflow));
+ if (!v_overflow_buffer)
+ goto cleanup2;
+
+ io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
+
+ /*
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
*/
io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
get_order(io_tlb_nslabs * sizeof(int)));
if (!io_tlb_list)
- goto cleanup2;
+ goto cleanup3;
for (i = 0; i < io_tlb_nslabs; i++)
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
@@ -276,18 +291,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
get_order(io_tlb_nslabs *
sizeof(phys_addr_t)));
if (!io_tlb_orig_addr)
- goto cleanup3;
+ goto cleanup4;
memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
- /*
- * Get the overflow emergency buffer
- */
- io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
- get_order(io_tlb_overflow));
- if (!io_tlb_overflow_buffer)
- goto cleanup4;
-
swiotlb_print_info();
late_alloc = 1;
@@ -295,13 +302,13 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
return 0;
cleanup4:
- free_pages((unsigned long)io_tlb_orig_addr,
- get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
- io_tlb_orig_addr = NULL;
-cleanup3:
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
sizeof(int)));
io_tlb_list = NULL;
+cleanup3:
+ free_pages((unsigned long)v_overflow_buffer,
+ get_order(io_tlb_overflow));
+ io_tlb_overflow_buffer = 0;
cleanup2:
io_tlb_addr = 0;
io_tlb_nslabs = 0;
@@ -310,11 +317,11 @@ cleanup2:
void __init swiotlb_free(void)
{
- if (!io_tlb_overflow_buffer)
+ if (!io_tlb_orig_addr)
return;
if (late_alloc) {
- free_pages((unsigned long)io_tlb_overflow_buffer,
+ free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
get_order(io_tlb_overflow));
free_pages((unsigned long)io_tlb_orig_addr,
get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
@@ -323,7 +330,7 @@ void __init swiotlb_free(void)
free_pages((unsigned long)phys_to_virt(io_tlb_addr),
get_order(io_tlb_nslabs << IO_TLB_SHIFT));
} else {
- free_bootmem_late(__pa(io_tlb_overflow_buffer),
+ free_bootmem_late(io_tlb_overflow_buffer,
PAGE_ALIGN(io_tlb_overflow));
free_bootmem_late(__pa(io_tlb_orig_addr),
PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
@@ -694,7 +701,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
map = map_single(dev, phys, size, dir);
if (!map) {
swiotlb_full(dev, size, dir, 1);
- map = io_tlb_overflow_buffer;
+ return phys_to_dma(dev, io_tlb_overflow_buffer);
}
dev_addr = swiotlb_virt_to_bus(dev, map);
@@ -704,7 +711,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
*/
if (!dma_capable(dev, dev_addr, size)) {
swiotlb_tbl_unmap_single(dev, map, size, dir);
- dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer);
+ return phys_to_dma(dev, io_tlb_overflow_buffer);
}
return dev_addr;
@@ -923,7 +930,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
{
- return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer));
+ return (dma_addr == phys_to_dma(hwdev, io_tlb_overflow_buffer));
}
EXPORT_SYMBOL(swiotlb_dma_mapping_error);
This change makes it so that the sync functionality also uses physical
addresses. This helps to further reduce the use of virt_to_phys and
phys_to_virt functions.
In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_tbl_sync_single I am renaming phys to orig_addr, and
dma_addr to tlb_addr. This way is should be clear that orig_addr is
contained within io_orig_addr and tlb_addr is an address within the
io_tlb_addr buffer.
Signed-off-by: Alexander Duyck <[email protected]>
---
drivers/xen/swiotlb-xen.c | 3 +--
include/linux/swiotlb.h | 3 ++-
lib/swiotlb.c | 22 +++++++++++-----------
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4cedc28..af47e75 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -433,8 +433,7 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
/* NOTE: We use dev_addr here, not paddr! */
if (is_xen_swiotlb_buffer(dev_addr)) {
- swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
- target);
+ swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
return;
}
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 291643c..e0ac98f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -47,7 +47,8 @@ extern void swiotlb_tbl_unmap_single(struct device *hwdev,
phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir);
-extern void swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr,
+extern void swiotlb_tbl_sync_single(struct device *hwdev,
+ phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir,
enum dma_sync_target target);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index f37050f..d6d1ddc 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -553,26 +553,27 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
}
EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
-void
-swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
- enum dma_data_direction dir,
- enum dma_sync_target target)
+void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir,
+ enum dma_sync_target target)
{
- int index = (dma_addr - (char *)phys_to_virt(io_tlb_addr)) >> IO_TLB_SHIFT;
- phys_addr_t phys = io_tlb_orig_addr[index];
+ int index = (tlb_addr - io_tlb_addr) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = io_tlb_orig_addr[index];
- phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
+ orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
switch (target) {
case SYNC_FOR_CPU:
if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+ swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+ size, DMA_FROM_DEVICE);
else
BUG_ON(dir != DMA_TO_DEVICE);
break;
case SYNC_FOR_DEVICE:
if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+ swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+ size, DMA_TO_DEVICE);
else
BUG_ON(dir != DMA_FROM_DEVICE);
break;
@@ -781,8 +782,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(paddr)) {
- swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
- target);
+ swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
return;
}
Currently swiotlb is the only consumer for swiotlb_bounce. Since that is the
case it doesn't make much sense to be exporting it so make it a static
function only.
In addition we can save a few more lines of code by making it so that it
accepts the DMA address as a physical address instead of a virtual one. This
is the last piece in essentially pushing all of the DMA address values to use
physical addresses in swiotlb.
In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_bounce I am renaming phys to orig_addr, and dma_addr to
tlb_addr. This way is should be clear that orig_addr is contained within
io_orig_addr and tlb_addr is an address within the io_tlb_addr buffer.
Signed-off-by: Alexander Duyck <[email protected]>
---
include/linux/swiotlb.h | 3 ---
lib/swiotlb.c | 35 ++++++++++++++++-------------------
2 files changed, 16 insertions(+), 22 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index e0ac98f..071d62c 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -53,9 +53,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
enum dma_sync_target target);
/* Accessory functions. */
-extern void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
- enum dma_data_direction dir);
-
extern void
*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d6d1ddc..c1c5f19 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -351,14 +351,15 @@ static int is_swiotlb_buffer(phys_addr_t paddr)
/*
* Bounce: copy the swiotlb buffer back to the original dma location
*/
-void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
- enum dma_data_direction dir)
+static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir)
{
- unsigned long pfn = PFN_DOWN(phys);
+ unsigned long pfn = PFN_DOWN(orig_addr);
+ unsigned char *vaddr = phys_to_virt(tlb_addr);
if (PageHighMem(pfn_to_page(pfn))) {
/* The buffer does not have a mapping. Map it in and copy */
- unsigned int offset = phys & ~PAGE_MASK;
+ unsigned int offset = orig_addr & ~PAGE_MASK;
char *buffer;
unsigned int sz = 0;
unsigned long flags;
@@ -369,25 +370,23 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
local_irq_save(flags);
buffer = kmap_atomic(pfn_to_page(pfn));
if (dir == DMA_TO_DEVICE)
- memcpy(dma_addr, buffer + offset, sz);
+ memcpy(vaddr, buffer + offset, sz);
else
- memcpy(buffer + offset, dma_addr, sz);
+ memcpy(buffer + offset, vaddr, sz);
kunmap_atomic(buffer);
local_irq_restore(flags);
size -= sz;
pfn++;
- dma_addr += sz;
+ vaddr += sz;
offset = 0;
}
+ } else if (dir == DMA_TO_DEVICE) {
+ memcpy(vaddr, phys_to_virt(orig_addr), size);
} else {
- if (dir == DMA_TO_DEVICE)
- memcpy(dma_addr, phys_to_virt(phys), size);
- else
- memcpy(phys_to_virt(phys), dma_addr, size);
+ memcpy(phys_to_virt(orig_addr), vaddr, size);
}
}
-EXPORT_SYMBOL_GPL(swiotlb_bounce);
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
dma_addr_t tbl_dma_addr,
@@ -489,8 +488,7 @@ found:
for (i = 0; i < nslots; i++)
io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), size,
- DMA_TO_DEVICE);
+ swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
return tlb_addr;
}
@@ -522,9 +520,8 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
/*
* First, sync the memory before unmapping the entry
*/
- if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
- swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
- size, DMA_FROM_DEVICE);
+ if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+ swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
/*
* Return the buffer to the free list by setting the corresponding
@@ -565,14 +562,14 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
switch (target) {
case SYNC_FOR_CPU:
if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+ swiotlb_bounce(orig_addr, tlb_addr,
size, DMA_FROM_DEVICE);
else
BUG_ON(dir != DMA_TO_DEVICE);
break;
case SYNC_FOR_DEVICE:
if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+ swiotlb_bounce(orig_addr, tlb_addr,
size, DMA_TO_DEVICE);
else
BUG_ON(dir != DMA_FROM_DEVICE);
This change makes it so that swiotlb_tbl_map_single will return a physical
address instead of a virtual address when called. The advantage to this once
again is that we are avoiding a number of virt_to_phys and phys_to_virt
translations by working with everything as a physical address.
One change I had to make in order to support using physical addresses is that
I could no longer trust 0 to be a invalid physical address on all platforms.
So instead I made it so that ~0 is returned on error. This should never be a
valid return value as it implies that only one byte would be available for
use.
In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_tbl_map_single I am renaming phys to orig_addr, and
dma_addr to tlb_addr. This way is should be clear that orig_addr is
contained within io_orig_addr and tlb_addr is an address within the
io_tlb_addr buffer.
Signed-off-by: Alexander Duyck <[email protected]>
---
drivers/xen/swiotlb-xen.c | 22 ++++++-------
include/linux/swiotlb.h | 11 +++++-
lib/swiotlb.c | 78 +++++++++++++++++++++++----------------------
3 files changed, 59 insertions(+), 52 deletions(-)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 58db6df..8a6035a 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -338,9 +338,8 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- phys_addr_t phys = page_to_phys(page) + offset;
+ phys_addr_t map, phys = page_to_phys(page) + offset;
dma_addr_t dev_addr = xen_phys_to_bus(phys);
- void *map;
BUG_ON(dir == DMA_NONE);
/*
@@ -356,16 +355,16 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
* Oh well, have to allocate and map a bounce buffer.
*/
map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
- if (!map)
+ if (map == SWIOTLB_MAP_ERROR)
return DMA_ERROR_CODE;
- dev_addr = xen_virt_to_bus(map);
+ dev_addr = xen_phys_to_bus(map);
/*
* Ensure that the address returned is DMA'ble
*/
if (!dma_capable(dev, dev_addr, size)) {
- swiotlb_tbl_unmap_single(dev, map, size, dir);
+ swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
dev_addr = 0;
}
return dev_addr;
@@ -494,11 +493,12 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
if (swiotlb_force ||
!dma_capable(hwdev, dev_addr, sg->length) ||
range_straddles_page_boundary(paddr, sg->length)) {
- void *map = swiotlb_tbl_map_single(hwdev,
- start_dma_addr,
- sg_phys(sg),
- sg->length, dir);
- if (!map) {
+ phys_addr_t map = swiotlb_tbl_map_single(hwdev,
+ start_dma_addr,
+ sg_phys(sg),
+ sg->length,
+ dir);
+ if (map == SWIOTLB_MAP_ERROR) {
/* Don't panic here, we expect map_sg users
to do proper error handling. */
xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
@@ -506,7 +506,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
sgl[0].dma_length = 0;
return DMA_ERROR_CODE;
}
- sg->dma_address = xen_virt_to_bus(map);
+ sg->dma_address = xen_phys_to_bus(map);
} else
sg->dma_address = dev_addr;
sg->dma_length = sg->length;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 8d08b3e..1995f3e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -34,9 +34,14 @@ enum dma_sync_target {
SYNC_FOR_CPU = 0,
SYNC_FOR_DEVICE = 1,
};
-extern void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
- phys_addr_t phys, size_t size,
- enum dma_data_direction dir);
+
+/* define the last possible byte of physical address space as a mapping error */
+#define SWIOTLB_MAP_ERROR (~(phys_addr_t)0x0)
+
+extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
+ dma_addr_t tbl_dma_addr,
+ phys_addr_t phys, size_t size,
+ enum dma_data_direction dir);
extern void swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr,
size_t size, enum dma_data_direction dir);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index bbf36d1..98d5733 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -389,12 +389,13 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
}
EXPORT_SYMBOL_GPL(swiotlb_bounce);
-void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
- phys_addr_t phys, size_t size,
- enum dma_data_direction dir)
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
+ dma_addr_t tbl_dma_addr,
+ phys_addr_t orig_addr, size_t size,
+ enum dma_data_direction dir)
{
unsigned long flags;
- char *dma_addr;
+ phys_addr_t tlb_addr;
unsigned int nslots, stride, index, wrap;
int i;
unsigned long mask;
@@ -458,7 +459,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
io_tlb_list[i] = 0;
for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
io_tlb_list[i] = ++count;
- dma_addr = (char *)phys_to_virt(io_tlb_addr) + (index << IO_TLB_SHIFT);
+ tlb_addr = io_tlb_addr + (index << IO_TLB_SHIFT);
/*
* Update the indices to avoid searching in the next
@@ -476,7 +477,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
not_found:
spin_unlock_irqrestore(&io_tlb_lock, flags);
- return NULL;
+ return SWIOTLB_MAP_ERROR;
found:
spin_unlock_irqrestore(&io_tlb_lock, flags);
@@ -486,11 +487,12 @@ found:
* needed.
*/
for (i = 0; i < nslots; i++)
- io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
+ io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+ swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), size,
+ DMA_TO_DEVICE);
- return dma_addr;
+ return tlb_addr;
}
EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
@@ -498,9 +500,8 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
* Allocates bounce buffer and returns its kernel virtual address.
*/
-static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
- enum dma_data_direction dir)
+phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
{
dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_addr);
@@ -594,12 +595,15 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_mask = hwdev->coherent_dma_mask;
ret = (void *)__get_free_pages(flags, order);
- if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) {
- /*
- * The allocated memory isn't reachable by the device.
- */
- free_pages((unsigned long) ret, order);
- ret = NULL;
+ if (ret) {
+ dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+ if (dev_addr + size - 1 > dma_mask) {
+ /*
+ * The allocated memory isn't reachable by the device.
+ */
+ free_pages((unsigned long) ret, order);
+ ret = NULL;
+ }
}
if (!ret) {
/*
@@ -607,13 +611,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
* GFP_DMA memory; fall back on map_single(), which
* will grab memory from the lowest available address range.
*/
- ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
- if (!ret)
+ phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
+ if (paddr == SWIOTLB_MAP_ERROR)
return NULL;
- }
- memset(ret, 0, size);
- dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+ ret = phys_to_virt(paddr);
+ dev_addr = phys_to_dma(hwdev, paddr);
+ }
/* Confirm address can be DMA'd by device */
if (dev_addr + size - 1 > dma_mask) {
@@ -625,7 +629,10 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
return NULL;
}
+
*dma_handle = dev_addr;
+ memset(ret, 0, size);
+
return ret;
}
EXPORT_SYMBOL(swiotlb_alloc_coherent);
@@ -682,9 +689,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- phys_addr_t phys = page_to_phys(page) + offset;
+ phys_addr_t map, phys = page_to_phys(page) + offset;
dma_addr_t dev_addr = phys_to_dma(dev, phys);
- void *map;
BUG_ON(dir == DMA_NONE);
/*
@@ -695,22 +701,18 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
return dev_addr;
- /*
- * Oh well, have to allocate and map a bounce buffer.
- */
+ /* Oh well, have to allocate and map a bounce buffer. */
map = map_single(dev, phys, size, dir);
- if (!map) {
+ if (map == SWIOTLB_MAP_ERROR) {
swiotlb_full(dev, size, dir, 1);
return phys_to_dma(dev, io_tlb_overflow_buffer);
}
- dev_addr = swiotlb_virt_to_bus(dev, map);
+ dev_addr = phys_to_dma(dev, map);
- /*
- * Ensure that the address returned is DMA'ble
- */
+ /* Ensure that the address returned is DMA'ble */
if (!dma_capable(dev, dev_addr, size)) {
- swiotlb_tbl_unmap_single(dev, map, size, dir);
+ swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
return phys_to_dma(dev, io_tlb_overflow_buffer);
}
@@ -836,9 +838,9 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
if (swiotlb_force ||
!dma_capable(hwdev, dev_addr, sg->length)) {
- void *map = map_single(hwdev, sg_phys(sg),
- sg->length, dir);
- if (!map) {
+ phys_addr_t map = map_single(hwdev, sg_phys(sg),
+ sg->length, dir);
+ if (map == SWIOTLB_MAP_ERROR) {
/* Don't panic here, we expect map_sg users
to do proper error handling. */
swiotlb_full(hwdev, sg->length, dir, 0);
@@ -847,7 +849,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
sgl[0].dma_length = 0;
return 0;
}
- sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
+ sg->dma_address = phys_to_dma(hwdev, map);
} else
sg->dma_address = dev_addr;
sg->dma_length = sg->length;
This change makes it so that the unmap functionality also uses physical
addresses. This helps to further reduce the use of virt_to_phys and
phys_to_virt functions.
In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_tbl_unmap_single I am renaming phys to orig_addr, and
dma_addr to tlb_addr. This way is should be clear that orig_addr is
contained within io_orig_addr and tlb_addr is an address within the
io_tlb_addr buffer.
Signed-off-by: Alexander Duyck <[email protected]>
---
drivers/xen/swiotlb-xen.c | 4 ++--
include/linux/swiotlb.h | 3 ++-
lib/swiotlb.c | 37 +++++++++++++++++++------------------
3 files changed, 23 insertions(+), 21 deletions(-)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 8a6035a..4cedc28 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -364,7 +364,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
* Ensure that the address returned is DMA'ble
*/
if (!dma_capable(dev, dev_addr, size)) {
- swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
+ swiotlb_tbl_unmap_single(dev, map, size, dir);
dev_addr = 0;
}
return dev_addr;
@@ -388,7 +388,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
/* NOTE: We use dev_addr here, not paddr! */
if (is_xen_swiotlb_buffer(dev_addr)) {
- swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+ swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
return;
}
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 1995f3e..291643c 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -43,7 +43,8 @@ extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t phys, size_t size,
enum dma_data_direction dir);
-extern void swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr,
+extern void swiotlb_tbl_unmap_single(struct device *hwdev,
+ phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir);
extern void swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 98d5733..f37050f 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -511,20 +511,20 @@ phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
/*
* dma_addr is the kernel virtual address of the bounce buffer to unmap.
*/
-void
-swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
- enum dma_data_direction dir)
+void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir)
{
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (dma_addr - (char *)phys_to_virt(io_tlb_addr)) >> IO_TLB_SHIFT;
- phys_addr_t phys = io_tlb_orig_addr[index];
+ int index = (tlb_addr - io_tlb_addr) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = io_tlb_orig_addr[index];
/*
* First, sync the memory before unmapping the entry
*/
if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
- swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+ swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+ size, DMA_FROM_DEVICE);
/*
* Return the buffer to the free list by setting the corresponding
@@ -617,17 +617,18 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
ret = phys_to_virt(paddr);
dev_addr = phys_to_dma(hwdev, paddr);
- }
- /* Confirm address can be DMA'd by device */
- if (dev_addr + size - 1 > dma_mask) {
- printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
- (unsigned long long)dma_mask,
- (unsigned long long)dev_addr);
+ /* Confirm address can be DMA'd by device */
+ if (dev_addr + size - 1 > dma_mask) {
+ printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
+ (unsigned long long)dma_mask,
+ (unsigned long long)dev_addr);
- /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
- swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
- return NULL;
+ /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
+ swiotlb_tbl_unmap_single(hwdev, paddr,
+ size, DMA_TO_DEVICE);
+ return NULL;
+ }
}
*dma_handle = dev_addr;
@@ -648,7 +649,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
free_pages((unsigned long)vaddr, get_order(size));
else
/* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
- swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+ swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE);
}
EXPORT_SYMBOL(swiotlb_free_coherent);
@@ -712,7 +713,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
/* Ensure that the address returned is DMA'ble */
if (!dma_capable(dev, dev_addr, size)) {
- swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
+ swiotlb_tbl_unmap_single(dev, map, size, dir);
return phys_to_dma(dev, io_tlb_overflow_buffer);
}
@@ -736,7 +737,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(paddr)) {
- swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+ swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
return;
}