2019-10-25 11:30:44

by Laurentiu Tudor

[permalink] [raw]
Subject: [PATCH v2 0/3] dma-mapping: introduce new dma unmap and sync variants

From: Laurentiu Tudor <[email protected]>

This series introduces a few new dma unmap and sync api variants that,
on top of what the originals do, return the virtual address
corresponding to the input dma address. In order to do that a new dma
map op is added, .get_virt_addr that takes the input dma address and
returns the virtual address backing it up.
The second patch adds an implementation for this new dma map op in the
generic iommu dma glue code and wires it in.
The third patch updates the dpaa2-eth driver to use the new apis.

Context: https://lkml.org/lkml/2019/5/31/684

Changes in v2
* use "dma_unmap_*_desc" names (Robin)
* return va/page instead of phys addr (Robin)

Changes since RFC
* completely changed the approach: added unmap and sync variants that
return the phys addr instead of adding a new dma to phys conversion
function

Laurentiu Tudor (3):
dma-mapping: introduce new dma unmap and sync api variants
iommu/dma: wire-up new dma map op .get_virt_addr
dpaa2_eth: use new unmap and sync dma api variants

drivers/iommu/dma-iommu.c | 16 ++++++
.../net/ethernet/freescale/dpaa2/dpaa2-eth.c | 40 +++++---------
.../net/ethernet/freescale/dpaa2/dpaa2-eth.h | 1 -
include/linux/dma-mapping.h | 55 +++++++++++++++++++
4 files changed, 86 insertions(+), 26 deletions(-)

--
2.17.1


2019-10-25 11:32:00

by Laurentiu Tudor

[permalink] [raw]
Subject: [PATCH v2 1/3] dma-mapping: introduce new dma unmap and sync api variants

From: Laurentiu Tudor <[email protected]>

Introduce a few new dma unmap and sync variants that, on top of the
original variants, return the virtual address corresponding to the
input dma address.
In order to implement this a new dma map op is added and used:
void *get_virt_addr(dev, dma_handle);
It does the actual conversion of an input dma address to the output
virtual address.

Signed-off-by: Laurentiu Tudor <[email protected]>
---
include/linux/dma-mapping.h | 55 +++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4a1c4fca475a..ae7bb8a84b9d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -132,6 +132,7 @@ struct dma_map_ops {
u64 (*get_required_mask)(struct device *dev);
size_t (*max_mapping_size)(struct device *dev);
unsigned long (*get_merge_boundary)(struct device *dev);
+ void *(*get_virt_addr)(struct device *dev, dma_addr_t dma_handle);
};

#define DMA_MAPPING_ERROR (~(dma_addr_t)0)
@@ -304,6 +305,21 @@ static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
debug_dma_unmap_page(dev, addr, size, dir);
}

+static inline struct page *
+dma_unmap_page_attrs_desc(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ void *ptr = NULL;
+
+ if (ops && ops->get_virt_addr)
+ ptr = ops->get_virt_addr(dev, addr);
+
+ dma_unmap_page_attrs(dev, addr, size, dir, attrs);
+
+ return ptr ? virt_to_page(ptr) : NULL;
+}
+
/*
* dma_maps_sg_attrs returns 0 on error and > 0 on success.
* It should never return a value < 0.
@@ -390,6 +406,21 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
debug_dma_sync_single_for_cpu(dev, addr, size, dir);
}

+static inline void *
+dma_sync_single_for_cpu_desc(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ void *ptr = NULL;
+
+ if (ops && ops->get_virt_addr)
+ ptr = ops->get_virt_addr(dev, addr);
+
+ dma_sync_single_for_cpu(dev, addr, size, dir);
+
+ return ptr;
+}
+
static inline void dma_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size,
enum dma_data_direction dir)
@@ -500,6 +531,12 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
}
+
+static inline void *
+dma_sync_single_for_cpu_desc(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir)
+{
+}
static inline void dma_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size, enum dma_data_direction dir)
{
@@ -594,6 +631,21 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
}

+static inline void *
+dma_unmap_single_attrs_desc(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ void *ptr = NULL;
+
+ if (ops && ops->get_virt_addr)
+ ptr = ops->get_virt_addr(dev, addr);
+
+ dma_unmap_single_attrs(dev, addr, size, dir, attrs);
+
+ return ptr;
+}
+
static inline void dma_sync_single_range_for_cpu(struct device *dev,
dma_addr_t addr, unsigned long offset, size_t size,
enum dma_data_direction dir)
@@ -610,10 +662,13 @@ static inline void dma_sync_single_range_for_device(struct device *dev,

#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0)
#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0)
+#define dma_unmap_single_desc(d, a, s, r) \
+ dma_unmap_single_attrs_desc(d, a, s, r, 0)
#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0)
#define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0)
#define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0)
#define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)
+#define dma_unmap_page_desc(d, a, s, r) dma_unmap_page_attrs_desc(d, a, s, r, 0)
#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0)

--
2.17.1

2019-10-25 11:34:01

by Laurentiu Tudor

[permalink] [raw]
Subject: [PATCH v2 2/3] iommu/dma: wire-up new dma map op .get_virt_addr

From: Laurentiu Tudor <[email protected]>

Add an implementation of the newly introduced dma map op in the
generic DMA IOMMU generic glue layer and wire it up.

Signed-off-by: Laurentiu Tudor <[email protected]>
---
drivers/iommu/dma-iommu.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f321279baf9e..15e76232d697 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1091,6 +1091,21 @@ static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
}

+static void *iommu_dma_get_virt_addr(struct device *dev, dma_addr_t dma_handle)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+ if (domain) {
+ phys_addr_t phys;
+
+ phys = iommu_iova_to_phys(domain, dma_handle);
+ if (phys)
+ return phys_to_virt(phys);
+ }
+
+ return NULL;
+}
+
static const struct dma_map_ops iommu_dma_ops = {
.alloc = iommu_dma_alloc,
.free = iommu_dma_free,
@@ -1107,6 +1122,7 @@ static const struct dma_map_ops iommu_dma_ops = {
.map_resource = iommu_dma_map_resource,
.unmap_resource = iommu_dma_unmap_resource,
.get_merge_boundary = iommu_dma_get_merge_boundary,
+ .get_virt_addr = iommu_dma_get_virt_addr,
};

/*
--
2.17.1

2019-10-25 13:34:09

by Laurentiu Tudor

[permalink] [raw]
Subject: [PATCH v2 3/3] dpaa2_eth: use new unmap and sync dma api variants

From: Laurentiu Tudor <[email protected]>

Convert this driver to usage of the newly introduced dma unmap and
sync DMA APIs. This will get rid of the unsupported direct usage of
iommu_iova_to_phys() API.

Signed-off-by: Laurentiu Tudor <[email protected]>
---
.../net/ethernet/freescale/dpaa2/dpaa2-eth.c | 40 +++++++------------
.../net/ethernet/freescale/dpaa2/dpaa2-eth.h | 1 -
2 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 19379bae0144..8c3391e6e598 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -29,16 +29,6 @@ MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Freescale Semiconductor, Inc");
MODULE_DESCRIPTION("Freescale DPAA2 Ethernet Driver");

-static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
- dma_addr_t iova_addr)
-{
- phys_addr_t phys_addr;
-
- phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) : iova_addr;
-
- return phys_to_virt(phys_addr);
-}
-
static void validate_rx_csum(struct dpaa2_eth_priv *priv,
u32 fd_status,
struct sk_buff *skb)
@@ -85,9 +75,10 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv,
sgt = vaddr + dpaa2_fd_get_offset(fd);
for (i = 1; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) {
addr = dpaa2_sg_get_addr(&sgt[i]);
- sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
- dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
- DMA_BIDIRECTIONAL);
+ sg_vaddr = page_to_virt
+ (dma_unmap_page_desc(dev, addr,
+ DPAA2_ETH_RX_BUF_SIZE,
+ DMA_BIDIRECTIONAL));

free_pages((unsigned long)sg_vaddr, 0);
if (dpaa2_sg_is_final(&sgt[i]))
@@ -143,9 +134,10 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,

/* Get the address and length from the S/G entry */
sg_addr = dpaa2_sg_get_addr(sge);
- sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, sg_addr);
- dma_unmap_page(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE,
- DMA_BIDIRECTIONAL);
+ sg_vaddr = page_to_virt
+ (dma_unmap_page_desc(dev, sg_addr,
+ DPAA2_ETH_RX_BUF_SIZE,
+ DMA_BIDIRECTIONAL));

sg_length = dpaa2_sg_get_len(sge);

@@ -210,9 +202,9 @@ static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
int i;

for (i = 0; i < count; i++) {
- vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]);
- dma_unmap_page(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
- DMA_BIDIRECTIONAL);
+ vaddr = page_to_virt(dma_unmap_page_desc(dev, buf_array[i],
+ DPAA2_ETH_RX_BUF_SIZE,
+ DMA_BIDIRECTIONAL));
free_pages((unsigned long)vaddr, 0);
}
}
@@ -369,9 +361,8 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
/* Tracing point */
trace_dpaa2_rx_fd(priv->net_dev, fd);

- vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
- dma_sync_single_for_cpu(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
- DMA_BIDIRECTIONAL);
+ vaddr = dma_sync_single_for_cpu_desc(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
+ DMA_BIDIRECTIONAL);

fas = dpaa2_get_fas(vaddr, false);
prefetch(fas);
@@ -682,7 +673,8 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
u32 fd_len = dpaa2_fd_get_len(fd);

fd_addr = dpaa2_fd_get_addr(fd);
- buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
+ buffer_start = dma_sync_single_for_cpu_desc(dev, fd_addr, sizeof(*swa),
+ DMA_BIDIRECTIONAL);
swa = (struct dpaa2_eth_swa *)buffer_start;

if (fd_format == dpaa2_fd_single) {
@@ -3448,8 +3440,6 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
priv = netdev_priv(net_dev);
priv->net_dev = net_dev;

- priv->iommu_domain = iommu_get_domain_for_dev(dev);
-
/* Obtain a MC portal */
err = fsl_mc_portal_allocate(dpni_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
&priv->mc_io);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 8a0e65b3267f..4e5183617ebd 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -374,7 +374,6 @@ struct dpaa2_eth_priv {

struct fsl_mc_device *dpbp_dev;
u16 bpid;
- struct iommu_domain *iommu_domain;

bool tx_tstamp; /* Tx timestamping enabled */
bool rx_tstamp; /* Rx timestamping enabled */
--
2.17.1

2019-10-25 20:44:03

by Jonathan Lemon

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] dpaa2_eth: use new unmap and sync dma api variants



On 24 Oct 2019, at 5:41, Laurentiu Tudor wrote:

> From: Laurentiu Tudor <[email protected]>
>
> Convert this driver to usage of the newly introduced dma unmap and
> sync DMA APIs. This will get rid of the unsupported direct usage of
> iommu_iova_to_phys() API.
>
> Signed-off-by: Laurentiu Tudor <[email protected]>
> ---
> .../net/ethernet/freescale/dpaa2/dpaa2-eth.c | 40
> +++++++------------
> .../net/ethernet/freescale/dpaa2/dpaa2-eth.h | 1 -
> 2 files changed, 15 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> index 19379bae0144..8c3391e6e598 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> @@ -29,16 +29,6 @@ MODULE_LICENSE("Dual BSD/GPL");
> MODULE_AUTHOR("Freescale Semiconductor, Inc");
> MODULE_DESCRIPTION("Freescale DPAA2 Ethernet Driver");
>
> -static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
> - dma_addr_t iova_addr)
> -{
> - phys_addr_t phys_addr;
> -
> - phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) :
> iova_addr;
> -
> - return phys_to_virt(phys_addr);
> -}
> -
> static void validate_rx_csum(struct dpaa2_eth_priv *priv,
> u32 fd_status,
> struct sk_buff *skb)
> @@ -85,9 +75,10 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv,
> sgt = vaddr + dpaa2_fd_get_offset(fd);
> for (i = 1; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) {
> addr = dpaa2_sg_get_addr(&sgt[i]);
> - sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
> - dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
> - DMA_BIDIRECTIONAL);
> + sg_vaddr = page_to_virt
> + (dma_unmap_page_desc(dev, addr,
> + DPAA2_ETH_RX_BUF_SIZE,
> + DMA_BIDIRECTIONAL));

This is doing virt -> page -> virt. Why not just have the new
function return the VA corresponding to the addr, which would
match the other functions?
--
Jonathan


>
> free_pages((unsigned long)sg_vaddr, 0);
> if (dpaa2_sg_is_final(&sgt[i]))
> @@ -143,9 +134,10 @@ static struct sk_buff *build_frag_skb(struct
> dpaa2_eth_priv *priv,
>
> /* Get the address and length from the S/G entry */
> sg_addr = dpaa2_sg_get_addr(sge);
> - sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, sg_addr);
> - dma_unmap_page(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE,
> - DMA_BIDIRECTIONAL);
> + sg_vaddr = page_to_virt
> + (dma_unmap_page_desc(dev, sg_addr,
> + DPAA2_ETH_RX_BUF_SIZE,
> + DMA_BIDIRECTIONAL));
>
> sg_length = dpaa2_sg_get_len(sge);
>
> @@ -210,9 +202,9 @@ static void free_bufs(struct dpaa2_eth_priv *priv,
> u64 *buf_array, int count)
> int i;
>
> for (i = 0; i < count; i++) {
> - vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]);
> - dma_unmap_page(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
> - DMA_BIDIRECTIONAL);
> + vaddr = page_to_virt(dma_unmap_page_desc(dev, buf_array[i],
> + DPAA2_ETH_RX_BUF_SIZE,
> + DMA_BIDIRECTIONAL));
> free_pages((unsigned long)vaddr, 0);
> }
> }
> @@ -369,9 +361,8 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv
> *priv,
> /* Tracing point */
> trace_dpaa2_rx_fd(priv->net_dev, fd);
>
> - vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
> - dma_sync_single_for_cpu(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
> - DMA_BIDIRECTIONAL);
> + vaddr = dma_sync_single_for_cpu_desc(dev, addr,
> DPAA2_ETH_RX_BUF_SIZE,
> + DMA_BIDIRECTIONAL);
>
> fas = dpaa2_get_fas(vaddr, false);
> prefetch(fas);
> @@ -682,7 +673,8 @@ static void free_tx_fd(const struct dpaa2_eth_priv
> *priv,
> u32 fd_len = dpaa2_fd_get_len(fd);
>
> fd_addr = dpaa2_fd_get_addr(fd);
> - buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
> + buffer_start = dma_sync_single_for_cpu_desc(dev, fd_addr,
> sizeof(*swa),
> + DMA_BIDIRECTIONAL);
> swa = (struct dpaa2_eth_swa *)buffer_start;
>
> if (fd_format == dpaa2_fd_single) {
> @@ -3448,8 +3440,6 @@ static int dpaa2_eth_probe(struct fsl_mc_device
> *dpni_dev)
> priv = netdev_priv(net_dev);
> priv->net_dev = net_dev;
>
> - priv->iommu_domain = iommu_get_domain_for_dev(dev);
> -
> /* Obtain a MC portal */
> err = fsl_mc_portal_allocate(dpni_dev,
> FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
> &priv->mc_io);
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
> b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
> index 8a0e65b3267f..4e5183617ebd 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
> @@ -374,7 +374,6 @@ struct dpaa2_eth_priv {
>
> struct fsl_mc_device *dpbp_dev;
> u16 bpid;
> - struct iommu_domain *iommu_domain;
>
> bool tx_tstamp; /* Tx timestamping enabled */
> bool rx_tstamp; /* Rx timestamping enabled */
> --
> 2.17.1

2019-10-28 19:53:42

by Laurentiu Tudor

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] dpaa2_eth: use new unmap and sync dma api variants

Hi Jonathan,

On 25.10.2019 19:12, Jonathan Lemon wrote:
>
>
> On 24 Oct 2019, at 5:41, Laurentiu Tudor wrote:
>
>> From: Laurentiu Tudor <[email protected]>
>>
>> Convert this driver to usage of the newly introduced dma unmap and
>> sync DMA APIs. This will get rid of the unsupported direct usage of
>> iommu_iova_to_phys() API.
>>
>> Signed-off-by: Laurentiu Tudor <[email protected]>
>> ---
>>  .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 40 +++++++------------
>>  .../net/ethernet/freescale/dpaa2/dpaa2-eth.h  |  1 -
>>  2 files changed, 15 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
>> b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
>> index 19379bae0144..8c3391e6e598 100644
>> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
>> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
>> @@ -29,16 +29,6 @@ MODULE_LICENSE("Dual BSD/GPL");
>>  MODULE_AUTHOR("Freescale Semiconductor, Inc");
>>  MODULE_DESCRIPTION("Freescale DPAA2 Ethernet Driver");
>>
>> -static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
>> -                dma_addr_t iova_addr)
>> -{
>> -    phys_addr_t phys_addr;
>> -
>> -    phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) :
>> iova_addr;
>> -
>> -    return phys_to_virt(phys_addr);
>> -}
>> -
>>  static void validate_rx_csum(struct dpaa2_eth_priv *priv,
>>                   u32 fd_status,
>>                   struct sk_buff *skb)
>> @@ -85,9 +75,10 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv,
>>      sgt = vaddr + dpaa2_fd_get_offset(fd);
>>      for (i = 1; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) {
>>          addr = dpaa2_sg_get_addr(&sgt[i]);
>> -        sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
>> -        dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
>> -                   DMA_BIDIRECTIONAL);
>> +        sg_vaddr = page_to_virt
>> +                (dma_unmap_page_desc(dev, addr,
>> +                            DPAA2_ETH_RX_BUF_SIZE,
>> +                            DMA_BIDIRECTIONAL));
>
> This is doing virt -> page -> virt.  Why not just have the new
> function return the VA corresponding to the addr, which would
> match the other functions?

I'd really like that as it would get rid of the page_to_virt() calls but
it will break the symmetry with the dma_map_page() API. I'll let the
maintainers decide.

---
Best Regards, Laurentiu

2019-10-28 19:58:17

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] dpaa2_eth: use new unmap and sync dma api variants

On Mon, Oct 28, 2019 at 10:55:05AM +0000, Laurentiu Tudor wrote:
> >> @@ -85,9 +75,10 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv,
> >> ???? sgt = vaddr + dpaa2_fd_get_offset(fd);
> >> ???? for (i = 1; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) {
> >> ???????? addr = dpaa2_sg_get_addr(&sgt[i]);
> >> -??????? sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
> >> -??????? dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
> >> -?????????????????? DMA_BIDIRECTIONAL);
> >> +??????? sg_vaddr = page_to_virt
> >> +??????????????? (dma_unmap_page_desc(dev, addr,
> >> +??????????????????????????? DPAA2_ETH_RX_BUF_SIZE,
> >> +??????????????????????????? DMA_BIDIRECTIONAL));
> >
> > This is doing virt -> page -> virt.? Why not just have the new
> > function return the VA corresponding to the addr, which would
> > match the other functions?
>
> I'd really like that as it would get rid of the page_to_virt() calls but
> it will break the symmetry with the dma_map_page() API. I'll let the
> maintainers decide.

It would be symmetric with dma_map_single, though. Maybe we need
both variants?

2019-10-28 20:43:51

by Robin Murphy

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] dma-mapping: introduce new dma unmap and sync api variants

On 24/10/2019 13:41, Laurentiu Tudor wrote:
> From: Laurentiu Tudor <[email protected]>
>
> Introduce a few new dma unmap and sync variants that, on top of the
> original variants, return the virtual address corresponding to the
> input dma address.
> In order to implement this a new dma map op is added and used:
> void *get_virt_addr(dev, dma_handle);
> It does the actual conversion of an input dma address to the output
> virtual address.

At this point, I think it might be better to just change the prototype
of the .unmap_page/.sync_single_for_cpu callbacks themselves. In cases
where .get_virt_addr would be non-trivial, it's most likely duplicating
work that the relevant callback has to do anyway (i.e. where the virtual
and/or physical address is needed internally for a cache maintenance or
bounce buffer operation). It would also help avoid any possible
ambiguity about whether .get_virt_addr returns the VA corresponding
dma_handle (if one exists) rather than the VA of the buffer *mapped to*
dma_handle, which for a bounce-buffering implementation would be
different, and the one you actually need - a naive
phys_to_virt(dma_to_phys(dma_handle)) would lead you to the wrong place
(in fact it looks like DPAA2 would currently go wrong with
"swiotlb=force" and the SMMU disabled or in passthrough).

One question there is whether we'd want careful special-casing to avoid
introducing overhead where unmap/sync are currently complete no-ops, or
whether an extra phys_to_virt() or so in those paths would be tolerable.

> Signed-off-by: Laurentiu Tudor <[email protected]>
> ---
> include/linux/dma-mapping.h | 55 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 55 insertions(+)
>
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 4a1c4fca475a..ae7bb8a84b9d 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -132,6 +132,7 @@ struct dma_map_ops {
> u64 (*get_required_mask)(struct device *dev);
> size_t (*max_mapping_size)(struct device *dev);
> unsigned long (*get_merge_boundary)(struct device *dev);
> + void *(*get_virt_addr)(struct device *dev, dma_addr_t dma_handle);
> };
>
> #define DMA_MAPPING_ERROR (~(dma_addr_t)0)
> @@ -304,6 +305,21 @@ static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
> debug_dma_unmap_page(dev, addr, size, dir);
> }
>
> +static inline struct page *
> +dma_unmap_page_attrs_desc(struct device *dev, dma_addr_t addr, size_t size,
> + enum dma_data_direction dir, unsigned long attrs)
> +{
> + const struct dma_map_ops *ops = get_dma_ops(dev);
> + void *ptr = NULL;
> +
> + if (ops && ops->get_virt_addr)
> + ptr = ops->get_virt_addr(dev, addr);

Note that this doesn't work for dma-direct, but for the sake of arm64 at
least it almost certainly wants to.

Robin.

> + dma_unmap_page_attrs(dev, addr, size, dir, attrs);
> +
> + return ptr ? virt_to_page(ptr) : NULL;
> +}
> +
> /*
> * dma_maps_sg_attrs returns 0 on error and > 0 on success.
> * It should never return a value < 0.
> @@ -390,6 +406,21 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
> debug_dma_sync_single_for_cpu(dev, addr, size, dir);
> }
>
> +static inline void *
> +dma_sync_single_for_cpu_desc(struct device *dev, dma_addr_t addr, size_t size,
> + enum dma_data_direction dir)
> +{
> + const struct dma_map_ops *ops = get_dma_ops(dev);
> + void *ptr = NULL;
> +
> + if (ops && ops->get_virt_addr)
> + ptr = ops->get_virt_addr(dev, addr);
> +
> + dma_sync_single_for_cpu(dev, addr, size, dir);
> +
> + return ptr;
> +}
> +
> static inline void dma_sync_single_for_device(struct device *dev,
> dma_addr_t addr, size_t size,
> enum dma_data_direction dir)
> @@ -500,6 +531,12 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
> size_t size, enum dma_data_direction dir)
> {
> }
> +
> +static inline void *
> +dma_sync_single_for_cpu_desc(struct device *dev, dma_addr_t addr, size_t size,
> + enum dma_data_direction dir)
> +{
> +}
> static inline void dma_sync_single_for_device(struct device *dev,
> dma_addr_t addr, size_t size, enum dma_data_direction dir)
> {
> @@ -594,6 +631,21 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
> return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
> }
>
> +static inline void *
> +dma_unmap_single_attrs_desc(struct device *dev, dma_addr_t addr, size_t size,
> + enum dma_data_direction dir, unsigned long attrs)
> +{
> + const struct dma_map_ops *ops = get_dma_ops(dev);
> + void *ptr = NULL;
> +
> + if (ops && ops->get_virt_addr)
> + ptr = ops->get_virt_addr(dev, addr);
> +
> + dma_unmap_single_attrs(dev, addr, size, dir, attrs);
> +
> + return ptr;
> +}
> +
> static inline void dma_sync_single_range_for_cpu(struct device *dev,
> dma_addr_t addr, unsigned long offset, size_t size,
> enum dma_data_direction dir)
> @@ -610,10 +662,13 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
>
> #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0)
> #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0)
> +#define dma_unmap_single_desc(d, a, s, r) \
> + dma_unmap_single_attrs_desc(d, a, s, r, 0)
> #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0)
> #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0)
> #define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0)
> #define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)
> +#define dma_unmap_page_desc(d, a, s, r) dma_unmap_page_attrs_desc(d, a, s, r, 0)
> #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
> #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0)
>
>

2019-10-29 01:46:37

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] dma-mapping: introduce new dma unmap and sync api variants

On Thu, Oct 24, 2019 at 12:41:41PM +0000, Laurentiu Tudor wrote:
> From: Laurentiu Tudor <[email protected]>
>
> Introduce a few new dma unmap and sync variants that, on top of the
> original variants, return the virtual address corresponding to the
> input dma address.
> In order to implement this a new dma map op is added and used:
> void *get_virt_addr(dev, dma_handle);
> It does the actual conversion of an input dma address to the output
> virtual address.

We'll definitively need an implementation for dma-direct at least as
well. Also as said previously we need a dma_can_unmap_by_dma_addr()
or similar helper that tells the driver beforehand if this works, so
that the driver can either use a sub-optimal workaround or fail the
probe if this functionality isn't implemented.

2019-10-29 02:14:19

by Robin Murphy

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] iommu/dma: wire-up new dma map op .get_virt_addr

On 24/10/2019 13:41, Laurentiu Tudor wrote:
> From: Laurentiu Tudor <[email protected]>
>
> Add an implementation of the newly introduced dma map op in the
> generic DMA IOMMU generic glue layer and wire it up.
>
> Signed-off-by: Laurentiu Tudor <[email protected]>
> ---
> drivers/iommu/dma-iommu.c | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index f321279baf9e..15e76232d697 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -1091,6 +1091,21 @@ static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
> return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
> }
>
> +static void *iommu_dma_get_virt_addr(struct device *dev, dma_addr_t dma_handle)
> +{
> + struct iommu_domain *domain = iommu_get_domain_for_dev(dev);

Note that we'd generally use the iommu_get_dma_domain() fastpath...

> +
> + if (domain) {

...wherein we can also assume that the device having iommu_dma_ops
assigned at all implies that a DMA ops domain is in place.

Robin.

> + phys_addr_t phys;
> +
> + phys = iommu_iova_to_phys(domain, dma_handle);
> + if (phys)
> + return phys_to_virt(phys);
> + }
> +
> + return NULL;
> +}
> +
> static const struct dma_map_ops iommu_dma_ops = {
> .alloc = iommu_dma_alloc,
> .free = iommu_dma_free,
> @@ -1107,6 +1122,7 @@ static const struct dma_map_ops iommu_dma_ops = {
> .map_resource = iommu_dma_map_resource,
> .unmap_resource = iommu_dma_unmap_resource,
> .get_merge_boundary = iommu_dma_get_merge_boundary,
> + .get_virt_addr = iommu_dma_get_virt_addr,
> };
>
> /*
>

2019-10-29 09:54:27

by Laurentiu Tudor

[permalink] [raw]
Subject: RE: [PATCH v2 1/3] dma-mapping: introduce new dma unmap and sync api variants



> -----Original Message-----
> From: [email protected] <[email protected]>
> Sent: Monday, October 28, 2019 2:38 PM
>
> On Thu, Oct 24, 2019 at 12:41:41PM +0000, Laurentiu Tudor wrote:
> > From: Laurentiu Tudor <[email protected]>
> >
> > Introduce a few new dma unmap and sync variants that, on top of the
> > original variants, return the virtual address corresponding to the
> > input dma address.
> > In order to implement this a new dma map op is added and used:
> > void *get_virt_addr(dev, dma_handle);
> > It does the actual conversion of an input dma address to the output
> > virtual address.
>
> We'll definitively need an implementation for dma-direct at least as
> well. Also as said previously we need a dma_can_unmap_by_dma_addr()
> or similar helper that tells the driver beforehand if this works, so
> that the driver can either use a sub-optimal workaround or fail the
> probe if this functionality isn't implemented.

Alright. On top of that I need to make this work on booke ppc as we have one driver that runs both on arm and ppc and will use these APIs.

---
Best Regards, Laurentiu

2019-10-30 09:53:07

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] dma-mapping: introduce new dma unmap and sync api variants

Hi Laurentiu,

I love your patch! Yet something to improve:

[auto build test ERROR on net-next/master]
[also build test ERROR on v5.4-rc5 next-20191029]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url: https://github.com/0day-ci/linux/commits/Laurentiu-Tudor/dma-mapping-introduce-new-dma-unmap-and-sync-variants/20191027-173418
base: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 503a64635d5ef7351657c78ad77f8b5ff658d5fc
config: um-x86_64_defconfig (attached as .config)
compiler: gcc-7 (Debian 7.4.0-14) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=um SUBARCH=x86_64

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <[email protected]>

All errors (new ones prefixed by >>):

In file included from include/linux/skbuff.h:31:0,
from include/net/net_namespace.h:38,
from include/linux/inet.h:42,
from include/linux/sunrpc/msg_prot.h:204,
from include/linux/sunrpc/auth.h:16,
from include/linux/nfs_fs.h:31,
from init/do_mounts.c:23:
include/linux/dma-mapping.h: In function 'dma_sync_single_for_cpu_desc':
include/linux/dma-mapping.h:539:1: warning: no return statement in function returning non-void [-Wreturn-type]
}
^
include/linux/dma-mapping.h: In function 'dma_unmap_single_attrs_desc':
>> include/linux/dma-mapping.h:638:34: error: implicit declaration of function 'get_dma_ops'; did you mean 'get_mm_rss'? [-Werror=implicit-function-declaration]
const struct dma_map_ops *ops = get_dma_ops(dev);
^~~~~~~~~~~
get_mm_rss
include/linux/dma-mapping.h:638:34: warning: initialization makes pointer from integer without a cast [-Wint-conversion]
cc1: some warnings being treated as errors

vim +638 include/linux/dma-mapping.h

534
535 static inline void *
536 dma_sync_single_for_cpu_desc(struct device *dev, dma_addr_t addr, size_t size,
537 enum dma_data_direction dir)
538 {
> 539 }
540 static inline void dma_sync_single_for_device(struct device *dev,
541 dma_addr_t addr, size_t size, enum dma_data_direction dir)
542 {
543 }
544 static inline void dma_sync_sg_for_cpu(struct device *dev,
545 struct scatterlist *sg, int nelems, enum dma_data_direction dir)
546 {
547 }
548 static inline void dma_sync_sg_for_device(struct device *dev,
549 struct scatterlist *sg, int nelems, enum dma_data_direction dir)
550 {
551 }
552 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
553 {
554 return -ENOMEM;
555 }
556 static inline void *dma_alloc_attrs(struct device *dev, size_t size,
557 dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
558 {
559 return NULL;
560 }
561 static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
562 dma_addr_t dma_handle, unsigned long attrs)
563 {
564 }
565 static inline void *dmam_alloc_attrs(struct device *dev, size_t size,
566 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
567 {
568 return NULL;
569 }
570 static inline void dmam_free_coherent(struct device *dev, size_t size,
571 void *vaddr, dma_addr_t dma_handle)
572 {
573 }
574 static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
575 enum dma_data_direction dir)
576 {
577 }
578 static inline int dma_get_sgtable_attrs(struct device *dev,
579 struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr,
580 size_t size, unsigned long attrs)
581 {
582 return -ENXIO;
583 }
584 static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
585 void *cpu_addr, dma_addr_t dma_addr, size_t size,
586 unsigned long attrs)
587 {
588 return -ENXIO;
589 }
590 static inline bool dma_can_mmap(struct device *dev)
591 {
592 return false;
593 }
594 static inline int dma_supported(struct device *dev, u64 mask)
595 {
596 return 0;
597 }
598 static inline int dma_set_mask(struct device *dev, u64 mask)
599 {
600 return -EIO;
601 }
602 static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
603 {
604 return -EIO;
605 }
606 static inline u64 dma_get_required_mask(struct device *dev)
607 {
608 return 0;
609 }
610 static inline size_t dma_max_mapping_size(struct device *dev)
611 {
612 return 0;
613 }
614 static inline unsigned long dma_get_merge_boundary(struct device *dev)
615 {
616 return 0;
617 }
618 #endif /* CONFIG_HAS_DMA */
619
620 static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
621 size_t size, enum dma_data_direction dir, unsigned long attrs)
622 {
623 debug_dma_map_single(dev, ptr, size);
624 return dma_map_page_attrs(dev, virt_to_page(ptr), offset_in_page(ptr),
625 size, dir, attrs);
626 }
627
628 static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
629 size_t size, enum dma_data_direction dir, unsigned long attrs)
630 {
631 return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
632 }
633
634 static inline void *
635 dma_unmap_single_attrs_desc(struct device *dev, dma_addr_t addr, size_t size,
636 enum dma_data_direction dir, unsigned long attrs)
637 {
> 638 const struct dma_map_ops *ops = get_dma_ops(dev);
639 void *ptr = NULL;
640
641 if (ops && ops->get_virt_addr)
642 ptr = ops->get_virt_addr(dev, addr);
643
644 dma_unmap_single_attrs(dev, addr, size, dir, attrs);
645
646 return ptr;
647 }
648

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (6.20 kB)
.config.gz (8.13 kB)
Download all attachments

2019-11-06 11:20:33

by Laurentiu Tudor

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] dpaa2_eth: use new unmap and sync dma api variants


On 28.10.2019 13:38, [email protected] wrote:
> On Mon, Oct 28, 2019 at 10:55:05AM +0000, Laurentiu Tudor wrote:
>>>> @@ -85,9 +75,10 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv,
>>>>      sgt = vaddr + dpaa2_fd_get_offset(fd);
>>>>      for (i = 1; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) {
>>>>          addr = dpaa2_sg_get_addr(&sgt[i]);
>>>> -        sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
>>>> -        dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
>>>> -                   DMA_BIDIRECTIONAL);
>>>> +        sg_vaddr = page_to_virt
>>>> +                (dma_unmap_page_desc(dev, addr,
>>>> +                            DPAA2_ETH_RX_BUF_SIZE,
>>>> +                            DMA_BIDIRECTIONAL));
>>>
>>> This is doing virt -> page -> virt.  Why not just have the new
>>> function return the VA corresponding to the addr, which would
>>> match the other functions?
>>
>> I'd really like that as it would get rid of the page_to_virt() calls but
>> it will break the symmetry with the dma_map_page() API. I'll let the
>> maintainers decide.
>
> It would be symmetric with dma_map_single, though. Maybe we need
> both variants?

Patch 1/3 also adds an dma_unmap_single_desc(). Would it be legal to
just use dma_unmap_single_desc() in the driver even if the driver does
it's mappings with dma_map_page()?

---
Best Regards, Laurentiu

2019-11-06 11:35:55

by Laurentiu Tudor

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] dma-mapping: introduce new dma unmap and sync api variants


On 28.10.2019 15:42, Robin Murphy wrote:
> On 24/10/2019 13:41, Laurentiu Tudor wrote:
>> From: Laurentiu Tudor <[email protected]>
>>
>> Introduce a few new dma unmap and sync variants that, on top of the
>> original variants, return the virtual address corresponding to the
>> input dma address.
>> In order to implement this a new dma map op is added and used:
>>      void *get_virt_addr(dev, dma_handle);
>> It does the actual conversion of an input dma address to the output
>> virtual address.
>
> At this point, I think it might be better to just change the prototype
> of the .unmap_page/.sync_single_for_cpu callbacks themselves.

I could give this a try. At a first sight, looks like it will be quite
an intrusive change.

> In cases
> where .get_virt_addr would be non-trivial, it's most likely duplicating
> work that the relevant callback has to do anyway (i.e. where the virtual
> and/or physical address is needed internally for a cache maintenance or
> bounce buffer operation). It would also help avoid any possible
> ambiguity about whether .get_virt_addr returns the VA corresponding
> dma_handle (if one exists) rather than the VA of the buffer *mapped to*
> dma_handle, which for a bounce-buffering implementation would be
> different, and the one you actually need - a naive
> phys_to_virt(dma_to_phys(dma_handle)) would lead you to the wrong place


> (in fact it looks like DPAA2 would currently go wrong with
> "swiotlb=force" and the SMMU disabled or in passthrough).

Yes, most likely.

> One question there is whether we'd want careful special-casing to avoid
> introducing overhead where unmap/sync are currently complete no-ops, or
> whether an extra phys_to_virt() or so in those paths would be tolerable.
>
>> Signed-off-by: Laurentiu Tudor <[email protected]>
>> ---
>>   include/linux/dma-mapping.h | 55 +++++++++++++++++++++++++++++++++++++
>>   1 file changed, 55 insertions(+)
>>
>> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
>> index 4a1c4fca475a..ae7bb8a84b9d 100644
>> --- a/include/linux/dma-mapping.h
>> +++ b/include/linux/dma-mapping.h
>> @@ -132,6 +132,7 @@ struct dma_map_ops {
>>       u64 (*get_required_mask)(struct device *dev);
>>       size_t (*max_mapping_size)(struct device *dev);
>>       unsigned long (*get_merge_boundary)(struct device *dev);
>> +    void *(*get_virt_addr)(struct device *dev, dma_addr_t dma_handle);
>>   };
>>   #define DMA_MAPPING_ERROR        (~(dma_addr_t)0)
>> @@ -304,6 +305,21 @@ static inline void dma_unmap_page_attrs(struct
>> device *dev, dma_addr_t addr,
>>       debug_dma_unmap_page(dev, addr, size, dir);
>>   }
>> +static inline struct page *
>> +dma_unmap_page_attrs_desc(struct device *dev, dma_addr_t addr, size_t
>> size,
>> +              enum dma_data_direction dir, unsigned long attrs)
>> +{
>> +    const struct dma_map_ops *ops = get_dma_ops(dev);
>> +    void *ptr = NULL;
>> +
>> +    if (ops && ops->get_virt_addr)
>> +        ptr = ops->get_virt_addr(dev, addr);
>
> Note that this doesn't work for dma-direct, but for the sake of arm64 at
> least it almost certainly wants to.
>

Will take care of it.

---
Best Regards, Laurentiu

>
>> +    dma_unmap_page_attrs(dev, addr, size, dir, attrs);
>> +
>> +    return ptr ? virt_to_page(ptr) : NULL;
>> +}
>> +
>>   /*
>>    * dma_maps_sg_attrs returns 0 on error and > 0 on success.
>>    * It should never return a value < 0.
>> @@ -390,6 +406,21 @@ static inline void dma_sync_single_for_cpu(struct
>> device *dev, dma_addr_t addr,
>>       debug_dma_sync_single_for_cpu(dev, addr, size, dir);
>>   }
>> +static inline void *
>> +dma_sync_single_for_cpu_desc(struct device *dev, dma_addr_t addr,
>> size_t size,
>> +                 enum dma_data_direction dir)
>> +{
>> +    const struct dma_map_ops *ops = get_dma_ops(dev);
>> +    void *ptr = NULL;
>> +
>> +    if (ops && ops->get_virt_addr)
>> +        ptr = ops->get_virt_addr(dev, addr);
>> +
>> +    dma_sync_single_for_cpu(dev, addr, size, dir);
>> +
>> +    return ptr;
>> +}
>> +
>>   static inline void dma_sync_single_for_device(struct device *dev,
>>                             dma_addr_t addr, size_t size,
>>                             enum dma_data_direction dir)
>> @@ -500,6 +531,12 @@ static inline void dma_sync_single_for_cpu(struct
>> device *dev, dma_addr_t addr,
>>           size_t size, enum dma_data_direction dir)
>>   {
>>   }
>> +
>> +static inline void *
>> +dma_sync_single_for_cpu_desc(struct device *dev, dma_addr_t addr,
>> size_t size,
>> +                 enum dma_data_direction dir)
>> +{
>> +}
>>   static inline void dma_sync_single_for_device(struct device *dev,
>>           dma_addr_t addr, size_t size, enum dma_data_direction dir)
>>   {
>> @@ -594,6 +631,21 @@ static inline void dma_unmap_single_attrs(struct
>> device *dev, dma_addr_t addr,
>>       return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
>>   }
>> +static inline void *
>> +dma_unmap_single_attrs_desc(struct device *dev, dma_addr_t addr,
>> size_t size,
>> +                enum dma_data_direction dir, unsigned long attrs)
>> +{
>> +    const struct dma_map_ops *ops = get_dma_ops(dev);
>> +    void *ptr = NULL;
>> +
>> +    if (ops && ops->get_virt_addr)
>> +        ptr = ops->get_virt_addr(dev, addr);
>> +
>> +    dma_unmap_single_attrs(dev, addr, size, dir, attrs);
>> +
>> +    return ptr;
>> +}
>> +
>>   static inline void dma_sync_single_range_for_cpu(struct device *dev,
>>           dma_addr_t addr, unsigned long offset, size_t size,
>>           enum dma_data_direction dir)
>> @@ -610,10 +662,13 @@ static inline void
>> dma_sync_single_range_for_device(struct device *dev,
>>   #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0)
>>   #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s,
>> r, 0)
>> +#define dma_unmap_single_desc(d, a, s, r) \
>> +        dma_unmap_single_attrs_desc(d, a, s, r, 0)
>>   #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0)
>>   #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0)
>>   #define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s,
>> r, 0)
>>   #define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)
>> +#define dma_unmap_page_desc(d, a, s, r) dma_unmap_page_attrs_desc(d,
>> a, s, r, 0)
>>   #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t,
>> v, h, s, 0)
>>   #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h,
>> s, 0)
>>

2019-11-07 12:32:08

by Laurentiu Tudor

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] dma-mapping: introduce new dma unmap and sync api variants

Hi Robin,

On 28.10.2019 15:42, Robin Murphy wrote:
> On 24/10/2019 13:41, Laurentiu Tudor wrote:
>> From: Laurentiu Tudor <[email protected]>
>>
>> Introduce a few new dma unmap and sync variants that, on top of the
>> original variants, return the virtual address corresponding to the
>> input dma address.
>> In order to implement this a new dma map op is added and used:
>>      void *get_virt_addr(dev, dma_handle);
>> It does the actual conversion of an input dma address to the output
>> virtual address.
>
> At this point, I think it might be better to just change the prototype
> of the .unmap_page/.sync_single_for_cpu callbacks themselves. In cases
> where .get_virt_addr would be non-trivial, it's most likely duplicating
> work that the relevant callback has to do anyway (i.e. where the virtual
> and/or physical address is needed internally for a cache maintenance or
> bounce buffer operation).

Looking in the generic dma-iommu, I didn't see any mean of freely
getting the pa or va bqcking the iova so I can't think of a way of doing
this without adding a call to iommu_iova_to_phys() somewhere in the
unmap op implementation. Obviously, this would come with an overhead
that will probably upset people.
At the moment I can't think at an option other than the initial one,
that is adding the .get_virt_addr op. Please let me know your opinions
on this.

---
Thanks & Best Regards, Laurentiu

> It would also help avoid any possible
> ambiguity about whether .get_virt_addr returns the VA corresponding
> dma_handle (if one exists) rather than the VA of the buffer *mapped to*
> dma_handle, which for a bounce-buffering implementation would be
> different, and the one you actually need - a naive
> phys_to_virt(dma_to_phys(dma_handle)) would lead you to the wrong place
> (in fact it looks like DPAA2 would currently go wrong with
> "swiotlb=force" and the SMMU disabled or in passthrough).
>
> One question there is whether we'd want careful special-casing to avoid
> introducing overhead where unmap/sync are currently complete no-ops, or
> whether an extra phys_to_virt() or so in those paths would be tolerable.
>
>> Signed-off-by: Laurentiu Tudor <[email protected]>
>> ---
>>   include/linux/dma-mapping.h | 55 +++++++++++++++++++++++++++++++++++++
>>   1 file changed, 55 insertions(+)
>>
>> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
>> index 4a1c4fca475a..ae7bb8a84b9d 100644
>> --- a/include/linux/dma-mapping.h
>> +++ b/include/linux/dma-mapping.h
>> @@ -132,6 +132,7 @@ struct dma_map_ops {
>>       u64 (*get_required_mask)(struct device *dev);
>>       size_t (*max_mapping_size)(struct device *dev);
>>       unsigned long (*get_merge_boundary)(struct device *dev);
>> +    void *(*get_virt_addr)(struct device *dev, dma_addr_t dma_handle);
>>   };
>>   #define DMA_MAPPING_ERROR        (~(dma_addr_t)0)
>> @@ -304,6 +305,21 @@ static inline void dma_unmap_page_attrs(struct
>> device *dev, dma_addr_t addr,
>>       debug_dma_unmap_page(dev, addr, size, dir);
>>   }
>> +static inline struct page *
>> +dma_unmap_page_attrs_desc(struct device *dev, dma_addr_t addr, size_t
>> size,
>> +              enum dma_data_direction dir, unsigned long attrs)
>> +{
>> +    const struct dma_map_ops *ops = get_dma_ops(dev);
>> +    void *ptr = NULL;
>> +
>> +    if (ops && ops->get_virt_addr)
>> +        ptr = ops->get_virt_addr(dev, addr);
>
> Note that this doesn't work for dma-direct, but for the sake of arm64 at
> least it almost certainly wants to.
>
> Robin.
>
>> +    dma_unmap_page_attrs(dev, addr, size, dir, attrs);
>> +
>> +    return ptr ? virt_to_page(ptr) : NULL;
>> +}
>> +
>>   /*
>>    * dma_maps_sg_attrs returns 0 on error and > 0 on success.
>>    * It should never return a value < 0.
>> @@ -390,6 +406,21 @@ static inline void dma_sync_single_for_cpu(struct
>> device *dev, dma_addr_t addr,
>>       debug_dma_sync_single_for_cpu(dev, addr, size, dir);
>>   }
>> +static inline void *
>> +dma_sync_single_for_cpu_desc(struct device *dev, dma_addr_t addr,
>> size_t size,
>> +                 enum dma_data_direction dir)
>> +{
>> +    const struct dma_map_ops *ops = get_dma_ops(dev);
>> +    void *ptr = NULL;
>> +
>> +    if (ops && ops->get_virt_addr)
>> +        ptr = ops->get_virt_addr(dev, addr);
>> +
>> +    dma_sync_single_for_cpu(dev, addr, size, dir);
>> +
>> +    return ptr;
>> +}
>> +
>>   static inline void dma_sync_single_for_device(struct device *dev,
>>                             dma_addr_t addr, size_t size,
>>                             enum dma_data_direction dir)
>> @@ -500,6 +531,12 @@ static inline void dma_sync_single_for_cpu(struct
>> device *dev, dma_addr_t addr,
>>           size_t size, enum dma_data_direction dir)
>>   {
>>   }
>> +
>> +static inline void *
>> +dma_sync_single_for_cpu_desc(struct device *dev, dma_addr_t addr,
>> size_t size,
>> +                 enum dma_data_direction dir)
>> +{
>> +}
>>   static inline void dma_sync_single_for_device(struct device *dev,
>>           dma_addr_t addr, size_t size, enum dma_data_direction dir)
>>   {
>> @@ -594,6 +631,21 @@ static inline void dma_unmap_single_attrs(struct
>> device *dev, dma_addr_t addr,
>>       return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
>>   }
>> +static inline void *
>> +dma_unmap_single_attrs_desc(struct device *dev, dma_addr_t addr,
>> size_t size,
>> +                enum dma_data_direction dir, unsigned long attrs)
>> +{
>> +    const struct dma_map_ops *ops = get_dma_ops(dev);
>> +    void *ptr = NULL;
>> +
>> +    if (ops && ops->get_virt_addr)
>> +        ptr = ops->get_virt_addr(dev, addr);
>> +
>> +    dma_unmap_single_attrs(dev, addr, size, dir, attrs);
>> +
>> +    return ptr;
>> +}
>> +
>>   static inline void dma_sync_single_range_for_cpu(struct device *dev,
>>           dma_addr_t addr, unsigned long offset, size_t size,
>>           enum dma_data_direction dir)
>> @@ -610,10 +662,13 @@ static inline void
>> dma_sync_single_range_for_device(struct device *dev,
>>   #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0)
>>   #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s,
>> r, 0)
>> +#define dma_unmap_single_desc(d, a, s, r) \
>> +        dma_unmap_single_attrs_desc(d, a, s, r, 0)
>>   #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0)
>>   #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0)
>>   #define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s,
>> r, 0)
>>   #define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)
>> +#define dma_unmap_page_desc(d, a, s, r) dma_unmap_page_attrs_desc(d,
>> a, s, r, 0)
>>   #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t,
>> v, h, s, 0)
>>   #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h,
>> s, 0)
>>