2020-04-15 21:28:30

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 3/4] dma-mapping: add a dma_ops_bypass flag to struct device

Several IOMMU drivers have a bypass mode where they can use a direct
mapping if the devices DMA mask is large enough. Add generic support
to the core dma-mapping code to do that to switch those drivers to
a common solution.

Signed-off-by: Christoph Hellwig <[email protected]>
---
include/linux/device.h | 6 ++++
kernel/dma/mapping.c | 70 +++++++++++++++++++++++++++++-------------
2 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index ac8e37cd716a..143c2e0edb19 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -523,6 +523,11 @@ struct dev_links_info {
* sync_state() callback.
* @dma_coherent: this particular device is dma coherent, even if the
* architecture supports non-coherent devices.
+ * @dma_ops_bypass: If set to %true then the dma_ops are bypassed for the
+ * streaming DMA operations (->map_* / ->unmap_* / ->sync_*),
+ * and optionall (if the coherent mask is large enough) also
+ * for dma allocations. This flag is managed by the dma ops
+ * instance from ->dma_supported.
*
* At the lowest level, every device in a Linux system is represented by an
* instance of struct device. The device structure contains the information
@@ -622,6 +627,7 @@ struct device {
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
bool dma_coherent:1;
#endif
+ bool dma_ops_bypass : 1;
};

static inline struct device *kobj_to_dev(struct kobject *kobj)
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 8e4155f9ee69..d776fb9f5adb 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -105,9 +105,33 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
}
EXPORT_SYMBOL(dmam_alloc_attrs);

-static inline bool dma_is_direct(const struct dma_map_ops *ops)
+/*
+ * Check if the devices uses a direct mapping for streaming DMA operations.
+ * This allows IOMMU drivers to set a bypass mode if the DMA mask is large
+ * enough.
+ */
+static inline bool dma_alloc_direct(struct device *dev,
+ const struct dma_map_ops *ops)
{
- return likely(!ops);
+ if (likely(!ops))
+ return true;
+ if (!dev->dma_ops_bypass)
+ return false;
+
+ return min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit) >=
+ dma_direct_get_required_mask(dev);
+}
+
+static inline bool dma_map_direct(struct device *dev,
+ const struct dma_map_ops *ops)
+{
+ if (likely(!ops))
+ return true;
+ if (!dev->dma_ops_bypass)
+ return false;
+
+ return min_not_zero(*dev->dma_mask, dev->bus_dma_limit) >=
+ dma_direct_get_required_mask(dev);
}

dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
@@ -118,7 +142,7 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
dma_addr_t addr;

BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
else
addr = ops->map_page(dev, page, offset, size, dir, attrs);
@@ -134,7 +158,7 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
const struct dma_map_ops *ops = get_dma_ops(dev);

BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_unmap_page(dev, addr, size, dir, attrs);
else if (ops->unmap_page)
ops->unmap_page(dev, addr, size, dir, attrs);
@@ -153,7 +177,7 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
int ents;

BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
else
ents = ops->map_sg(dev, sg, nents, dir, attrs);
@@ -172,7 +196,7 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,

BUG_ON(!valid_dma_direction(dir));
debug_dma_unmap_sg(dev, sg, nents, dir);
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
else if (ops->unmap_sg)
ops->unmap_sg(dev, sg, nents, dir, attrs);
@@ -191,7 +215,7 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr))))
return DMA_MAPPING_ERROR;

- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
else if (ops->map_resource)
addr = ops->map_resource(dev, phys_addr, size, dir, attrs);
@@ -207,7 +231,7 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
const struct dma_map_ops *ops = get_dma_ops(dev);

BUG_ON(!valid_dma_direction(dir));
- if (!dma_is_direct(ops) && ops->unmap_resource)
+ if (!dma_map_direct(dev, ops) && ops->unmap_resource)
ops->unmap_resource(dev, addr, size, dir, attrs);
debug_dma_unmap_resource(dev, addr, size, dir);
}
@@ -219,7 +243,7 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
const struct dma_map_ops *ops = get_dma_ops(dev);

BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
else if (ops->sync_single_for_cpu)
ops->sync_single_for_cpu(dev, addr, size, dir);
@@ -233,7 +257,7 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
const struct dma_map_ops *ops = get_dma_ops(dev);

BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_sync_single_for_device(dev, addr, size, dir);
else if (ops->sync_single_for_device)
ops->sync_single_for_device(dev, addr, size, dir);
@@ -247,7 +271,7 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
const struct dma_map_ops *ops = get_dma_ops(dev);

BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir);
else if (ops->sync_sg_for_cpu)
ops->sync_sg_for_cpu(dev, sg, nelems, dir);
@@ -261,7 +285,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
const struct dma_map_ops *ops = get_dma_ops(dev);

BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_sync_sg_for_device(dev, sg, nelems, dir);
else if (ops->sync_sg_for_device)
ops->sync_sg_for_device(dev, sg, nelems, dir);
@@ -302,7 +326,7 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
{
const struct dma_map_ops *ops = get_dma_ops(dev);

- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr,
size, attrs);
if (!ops->get_sgtable)
@@ -372,7 +396,7 @@ bool dma_can_mmap(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);

- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_can_mmap(dev);
return ops->mmap != NULL;
}
@@ -397,7 +421,7 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
{
const struct dma_map_ops *ops = get_dma_ops(dev);

- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size,
attrs);
if (!ops->mmap)
@@ -410,7 +434,7 @@ u64 dma_get_required_mask(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);

- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_get_required_mask(dev);
if (ops->get_required_mask)
return ops->get_required_mask(dev);
@@ -441,7 +465,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* let the implementation decide on the zone to allocate from: */
flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);

- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
else if (ops->alloc)
cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
@@ -473,7 +497,7 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
return;

debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
dma_direct_free(dev, size, cpu_addr, dma_handle, attrs);
else if (ops->free)
ops->free(dev, size, cpu_addr, dma_handle, attrs);
@@ -484,7 +508,11 @@ int dma_supported(struct device *dev, u64 mask)
{
const struct dma_map_ops *ops = get_dma_ops(dev);

- if (dma_is_direct(ops))
+ /*
+ * ->dma_supported sets the bypass flag, so we must always call
+ * into the method here unless the device is truly direct mapped.
+ */
+ if (!ops)
return dma_direct_supported(dev, mask);
if (!ops->dma_supported)
return 1;
@@ -540,7 +568,7 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size,

BUG_ON(!valid_dma_direction(dir));

- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
arch_dma_cache_sync(dev, vaddr, size, dir);
else if (ops->cache_sync)
ops->cache_sync(dev, vaddr, size, dir);
@@ -552,7 +580,7 @@ size_t dma_max_mapping_size(struct device *dev)
const struct dma_map_ops *ops = get_dma_ops(dev);
size_t size = SIZE_MAX;

- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
size = dma_direct_max_mapping_size(dev);
else if (ops && ops->max_mapping_size)
size = ops->max_mapping_size(dev);
--
2.25.1


2020-04-18 12:43:44

by Joerg Roedel

[permalink] [raw]
Subject: Re: [PATCH 3/4] dma-mapping: add a dma_ops_bypass flag to struct device

Hi Christoph,

On Tue, Apr 14, 2020 at 02:25:05PM +0200, Christoph Hellwig wrote:
> +static inline bool dma_map_direct(struct device *dev,
> + const struct dma_map_ops *ops)
> +{
> + if (likely(!ops))
> + return true;
> + if (!dev->dma_ops_bypass)
> + return false;
> +
> + return min_not_zero(*dev->dma_mask, dev->bus_dma_limit) >=
> + dma_direct_get_required_mask(dev);

Why is the dma-mask check done here? The dma-direct code handles memory
outside of the devices dma-mask with swiotlb, no?

I also don't quite get what the difference between setting the
dma_ops_bypass flag non-zero and setting ops to NULL is.


Joerg


2020-04-19 08:02:23

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 3/4] dma-mapping: add a dma_ops_bypass flag to struct device

On Sat, Apr 18, 2020 at 02:42:05PM +0200, Joerg Roedel wrote:
> Hi Christoph,
>
> On Tue, Apr 14, 2020 at 02:25:05PM +0200, Christoph Hellwig wrote:
> > +static inline bool dma_map_direct(struct device *dev,
> > + const struct dma_map_ops *ops)
> > +{
> > + if (likely(!ops))
> > + return true;
> > + if (!dev->dma_ops_bypass)
> > + return false;
> > +
> > + return min_not_zero(*dev->dma_mask, dev->bus_dma_limit) >=
> > + dma_direct_get_required_mask(dev);
>
> Why is the dma-mask check done here? The dma-direct code handles memory
> outside of the devices dma-mask with swiotlb, no?
>
> I also don't quite get what the difference between setting the
> dma_ops_bypass flag non-zero and setting ops to NULL is.

The difference is that NULL ops mean imply the direct mapping is always
used, dma_ops_bypass means a direct mapping is used if no bounce buffering
using swiotlb is needed, which should also answer your first question.
The idea is to consolidate code in the core to use an opportunistic
direct mapping instead of the dynamic iommu mapping. I though the cover
letter and commit log explained this well enough, but maybe I need to
do a better job.

2020-04-19 12:27:15

by Joerg Roedel

[permalink] [raw]
Subject: Re: [PATCH 3/4] dma-mapping: add a dma_ops_bypass flag to struct device

On Sun, Apr 19, 2020 at 10:00:58AM +0200, Christoph Hellwig wrote:
> The difference is that NULL ops mean imply the direct mapping is always
> used, dma_ops_bypass means a direct mapping is used if no bounce buffering
> using swiotlb is needed, which should also answer your first question.
> The idea is to consolidate code in the core to use an opportunistic
> direct mapping instead of the dynamic iommu mapping. I though the cover
> letter and commit log explained this well enough, but maybe I need to
> do a better job.

Ah right, now I see it, when dma_ops_bypass is set it will only use
direct mapping when the available memory fits into the device's
dma_masks, and calls into dma_ops otherwise.

I wonder how that will interact with an IOMMU driver, which has to make
sure that the direct mapping is accessible for the device at all. It
can either put the device into a passthrough domain for direct mapping
or into a re-mapped domain, but then all DMA-API calls need to use dma-ops.
When the dma_mask covers available memory but coherent_mask doesn't,
the streaming calls will use dma-direct and alloc_coherent() calls into
dma-ops. There is no way for the IOMMU driver to ensure both works.

So what are the conditions under which an IOMMU driver would set
dma_ops_bypass to 1 and get a different result as to when setting
dev->dma_ops to NULL?

Regards,

Joerg

2020-04-19 15:13:59

by Alexey Kardashevskiy

[permalink] [raw]
Subject: Re: [PATCH 3/4] dma-mapping: add a dma_ops_bypass flag to struct device



On 19/04/2020 22:25, Joerg Roedel wrote:
> On Sun, Apr 19, 2020 at 10:00:58AM +0200, Christoph Hellwig wrote:
>> The difference is that NULL ops mean imply the direct mapping is always
>> used, dma_ops_bypass means a direct mapping is used if no bounce buffering
>> using swiotlb is needed, which should also answer your first question.
>> The idea is to consolidate code in the core to use an opportunistic
>> direct mapping instead of the dynamic iommu mapping. I though the cover
>> letter and commit log explained this well enough, but maybe I need to
>> do a better job.
>
> Ah right, now I see it, when dma_ops_bypass is set it will only use
> direct mapping when the available memory fits into the device's
> dma_masks, and calls into dma_ops otherwise.
>
> I wonder how that will interact with an IOMMU driver, which has to make
> sure that the direct mapping is accessible for the device at all. It
> can either put the device into a passthrough domain for direct mapping
> or into a re-mapped domain, but then all DMA-API calls need to use dma-ops.
> When the dma_mask covers available memory but coherent_mask doesn't,
> the streaming calls will use dma-direct and alloc_coherent() calls into
> dma-ops. There is no way for the IOMMU driver to ensure both works.
>
> So what are the conditions under which an IOMMU driver would set
> dma_ops_bypass to 1 and get a different result as to when setting
> dev->dma_ops to NULL?


One example is powerpc64/pseries (arch/powerpc/kernel/dma-iommu.c) where
dma_iommu_ops::dma_iommu_dma_supported() (i.e. need ops) decides whether
to set dma_ops_bypass to 1. It tries creating a DMA window with 1:1
mapping to fit maximum possible RAM address, if that works, then ops is
not needed.


--
Alexey