On systems where DMA addresses and physical addresses are not 1:1
(such as Xen PV guests), the generic dma_get_required_mask() will not
return the correct mask (since it uses max_pfn).
Some device drivers (such as mptsas, mpt2sas) use
dma_get_required_mask() to set the device's DMA mask to allow them to use
only 32-bit DMA addresses in hardware structures. This results in
unnecessary use of the SWIOTLB if DMA addresses are more than 32-bits,
impacting performance significantly.
This series allows Xen PV guests to override the default
dma_get_required_mask() with a more suitable one.
Changes in v5:
- xen_swiotlb_get_required_mask() is x86 only.
Changes in v4:
- Assume 64-bit mask is required.
Changes in v3:
- fix off-by-one in xen_dma_get_required_mask()
- split ia64 changes into separate patch.
Changes in v2:
- split x86 and xen changes into separate patches
David
Use dma_ops->get_required_mask() if provided, defaulting to
dma_get_requried_mask_from_max_pfn().
This is needed on systems (such as Xen PV guests) where the DMA
address and the physical address are not equal.
ARCH_HAS_DMA_GET_REQUIRED_MASK is defined in asm/device.h instead of
asm/dma-mapping.h because linux/dma-mapping.h uses the define before
including asm/dma-mapping.h
Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Stefano Stabellini <[email protected]>
---
arch/x86/include/asm/device.h | 2 ++
arch/x86/kernel/pci-dma.c | 8 ++++++++
2 files changed, 10 insertions(+)
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 03dd729..10bc628 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -13,4 +13,6 @@ struct dev_archdata {
struct pdev_archdata {
};
+#define ARCH_HAS_DMA_GET_REQUIRED_MASK
+
#endif /* _ASM_X86_DEVICE_H */
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index a25e202..5154400 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -140,6 +140,14 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
free_pages((unsigned long)vaddr, get_order(size));
}
+u64 dma_get_required_mask(struct device *dev)
+{
+ if (dma_ops->get_required_mask)
+ return dma_ops->get_required_mask(dev);
+ return dma_get_required_mask_from_max_pfn(dev);
+}
+EXPORT_SYMBOL_GPL(dma_get_required_mask);
+
/*
* See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
* parameter documentation.
--
1.7.10.4
On a Xen PV guest the DMA addresses and physical addresses are not 1:1
(such as Xen PV guests) and the generic dma_get_required_mask() does
not return the correct mask (since it uses max_pfn).
Some device drivers (such as mptsas, mpt2sas) use
dma_get_required_mask() to set the device's DMA mask to allow them to
use only 32-bit DMA addresses in hardware structures. This results in
unnecessary use of the SWIOTLB if DMA addresses are more than 32-bits,
impacting performance significantly.
We could base the DMA mask on the maximum MFN but:
a) The hypercall op to get the maximum MFN (XENMEM_maximum_ram_page)
will truncate the result to an int in 32-bit guests.
b) Future uses of the IOMMU in Xen may map frames at bus addresses
above the end of RAM.
So, just assume a 64-bit DMA mask is always required.
Signed-off-by: David Vrabel <[email protected]>
---
arch/x86/xen/pci-swiotlb-xen.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 0e98e5d..35774f8 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -18,6 +18,11 @@
int xen_swiotlb __read_mostly;
+static u64 xen_swiotlb_get_required_mask(struct device *dev)
+{
+ return DMA_BIT_MASK(64);
+}
+
static struct dma_map_ops xen_swiotlb_dma_ops = {
.mapping_error = xen_swiotlb_dma_mapping_error,
.alloc = xen_swiotlb_alloc_coherent,
@@ -31,6 +36,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
.map_page = xen_swiotlb_map_page,
.unmap_page = xen_swiotlb_unmap_page,
.dma_supported = xen_swiotlb_dma_supported,
+ .get_required_mask = xen_swiotlb_get_required_mask,
};
/*
--
1.7.10.4
Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Stefano Stabellini <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: [email protected]
---
arch/ia64/include/asm/machvec.h | 2 +-
arch/ia64/include/asm/machvec_init.h | 1 -
arch/ia64/pci/pci.c | 20 --------------------
3 files changed, 1 insertion(+), 22 deletions(-)
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 9c39bdf..beaa47d 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -287,7 +287,7 @@ extern struct dma_map_ops *dma_get_ops(struct device *);
# define platform_dma_get_ops dma_get_ops
#endif
#ifndef platform_dma_get_required_mask
-# define platform_dma_get_required_mask ia64_dma_get_required_mask
+# define platform_dma_get_required_mask dma_get_required_mask_from_max_pfn
#endif
#ifndef platform_irq_to_vector
# define platform_irq_to_vector __ia64_irq_to_vector
diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h
index 37a4698..ef964b2 100644
--- a/arch/ia64/include/asm/machvec_init.h
+++ b/arch/ia64/include/asm/machvec_init.h
@@ -3,7 +3,6 @@
extern ia64_mv_send_ipi_t ia64_send_ipi;
extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge;
-extern ia64_mv_dma_get_required_mask ia64_dma_get_required_mask;
extern ia64_mv_irq_to_vector __ia64_irq_to_vector;
extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq;
extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem;
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 291a582..79da21b 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -791,26 +791,6 @@ static void __init set_pci_dfl_cacheline_size(void)
pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4;
}
-u64 ia64_dma_get_required_mask(struct device *dev)
-{
- u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
- u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
- u64 mask;
-
- if (!high_totalram) {
- /* convert to mask just covering totalram */
- low_totalram = (1 << (fls(low_totalram) - 1));
- low_totalram += low_totalram - 1;
- mask = low_totalram;
- } else {
- high_totalram = (1 << (fls(high_totalram) - 1));
- high_totalram += high_totalram - 1;
- mask = (((u64)high_totalram) << 32) + 0xffffffff;
- }
- return mask;
-}
-EXPORT_SYMBOL_GPL(ia64_dma_get_required_mask);
-
u64 dma_get_required_mask(struct device *dev)
{
return platform_dma_get_required_mask(dev);
--
1.7.10.4
A generic dma_get_required_mask() is useful even for architectures (such
as ia64) that define ARCH_HAS_GET_REQUIRED_MASK.
Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Stefano Stabellini <[email protected]>
---
drivers/base/platform.c | 10 ++++++++--
include/linux/dma-mapping.h | 1 +
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index b2afc29..f9f3930 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1009,8 +1009,7 @@ int __init platform_bus_init(void)
return error;
}
-#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK
-u64 dma_get_required_mask(struct device *dev)
+u64 dma_get_required_mask_from_max_pfn(struct device *dev)
{
u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
@@ -1028,6 +1027,13 @@ u64 dma_get_required_mask(struct device *dev)
}
return mask;
}
+EXPORT_SYMBOL_GPL(dma_get_required_mask_from_max_pfn);
+
+#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK
+u64 dma_get_required_mask(struct device *dev)
+{
+ return dma_get_required_mask_from_max_pfn(dev);
+}
EXPORT_SYMBOL_GPL(dma_get_required_mask);
#endif
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index d5d3881..6e2fdfc 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -127,6 +127,7 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
return dma_set_mask_and_coherent(dev, mask);
}
+extern u64 dma_get_required_mask_from_max_pfn(struct device *dev);
extern u64 dma_get_required_mask(struct device *dev);
#ifndef set_arch_dma_coherent_ops
--
1.7.10.4
On Fri, Dec 05, 2014 at 02:07:59PM +0000, David Vrabel wrote:
> On systems where DMA addresses and physical addresses are not 1:1
> (such as Xen PV guests), the generic dma_get_required_mask() will not
> return the correct mask (since it uses max_pfn).
>
> Some device drivers (such as mptsas, mpt2sas) use
> dma_get_required_mask() to set the device's DMA mask to allow them to use
> only 32-bit DMA addresses in hardware structures. This results in
> unnecessary use of the SWIOTLB if DMA addresses are more than 32-bits,
> impacting performance significantly.
>
> This series allows Xen PV guests to override the default
> dma_get_required_mask() with a more suitable one.
>
> Changes in v5:
> - xen_swiotlb_get_required_mask() is x86 only.
>
> Changes in v4:
> - Assume 64-bit mask is required.
>
> Changes in v3:
> - fix off-by-one in xen_dma_get_required_mask()
> - split ia64 changes into separate patch.
>
> Changes in v2:
> - split x86 and xen changes into separate patches
>
> David
Why are you sending these to me? Am I the DMA maintainer and forgot
about it?
/me digs in MAINTAINERS...
Nope, not me! Patches are now deleted from my queue, go use
scripts/get_maintainer.pl like you should have done...
greg k-h
On Fri, Dec 05, 2014 at 02:08:00PM +0000, David Vrabel wrote:
> A generic dma_get_required_mask() is useful even for architectures (such
> as ia64) that define ARCH_HAS_GET_REQUIRED_MASK.
>
> Signed-off-by: David Vrabel <[email protected]>
> Reviewed-by: Stefano Stabellini <[email protected]>
> ---
> drivers/base/platform.c | 10 ++++++++--
Is this why you sent this to me? The x86 maintainers should handle this
patch set, not me for a tiny 8 lines in just one of the files, sorry.
greg k-h
On 05/12/14 21:31, Greg Kroah-Hartman wrote:
> On Fri, Dec 05, 2014 at 02:08:00PM +0000, David Vrabel wrote:
>> A generic dma_get_required_mask() is useful even for architectures (such
>> as ia64) that define ARCH_HAS_GET_REQUIRED_MASK.
>>
>> Signed-off-by: David Vrabel <[email protected]>
>> Reviewed-by: Stefano Stabellini <[email protected]>
>> ---
>> drivers/base/platform.c | 10 ++++++++--
>
> Is this why you sent this to me? The x86 maintainers should handle this
> patch set, not me for a tiny 8 lines in just one of the files, sorry.
This series will be merged via the Xen tree, but this patch still needs
your review or ack.
David
On Mon, Dec 08, 2014 at 10:36:14AM +0000, David Vrabel wrote:
> On 05/12/14 21:31, Greg Kroah-Hartman wrote:
> > On Fri, Dec 05, 2014 at 02:08:00PM +0000, David Vrabel wrote:
> >> A generic dma_get_required_mask() is useful even for architectures (such
> >> as ia64) that define ARCH_HAS_GET_REQUIRED_MASK.
> >>
> >> Signed-off-by: David Vrabel <[email protected]>
> >> Reviewed-by: Stefano Stabellini <[email protected]>
> >> ---
> >> drivers/base/platform.c | 10 ++++++++--
> >
> > Is this why you sent this to me? The x86 maintainers should handle this
> > patch set, not me for a tiny 8 lines in just one of the files, sorry.
>
> This series will be merged via the Xen tree, but this patch still needs
> your review or ack.
How about waiting until after the merge window and resending it, asking
for that, instead of making me guess :)
greg k-h