2019-01-19 00:12:02

by Logan Gunthorpe

[permalink] [raw]
Subject: [PATCH] NTB: ntb_transport: Ensure the destination buffer is mapped for TX DMA

Presently, when ntb_transport is used with DMA and the IOMMU turned on,
it fails with errors from the IOMMU such as:

DMAR: DRHD: handling fault status reg 202
DMAR: [DMA Write] Request device [00:04.0] fault addr
381fc0340000 [fault reason 05] PTE Write access is not set

This is because ntb_transport does not map the BAR space with the IOMMU.

To fix this, we map the entire MW region for each QP after we assign
the DMA channel. This prevents needing an extra DMA map in the fast
path.

Link: https://lore.kernel.org/linux-pci/[email protected]/
Signed-off-by: Logan Gunthorpe <[email protected]>
Cc: Jon Mason <[email protected]>
Cc: Dave Jiang <[email protected]>
Cc: Allen Hubbe <[email protected]>
---
drivers/ntb/ntb_transport.c | 28 ++++++++++++++++++++++++++--
1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 3bfdb4562408..526b65afc16a 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -144,7 +144,9 @@ struct ntb_transport_qp {
struct list_head tx_free_q;
spinlock_t ntb_tx_free_q_lock;
void __iomem *tx_mw;
- dma_addr_t tx_mw_phys;
+ phys_addr_t tx_mw_phys;
+ size_t tx_mw_size;
+ dma_addr_t tx_mw_dma_addr;
unsigned int tx_index;
unsigned int tx_max_entry;
unsigned int tx_max_frame;
@@ -1049,6 +1051,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
tx_size = (unsigned int)mw_size / num_qps_mw;
qp_offset = tx_size * (qp_num / mw_count);

+ qp->tx_mw_size = tx_size;
qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
if (!qp->tx_mw)
return -EINVAL;
@@ -1644,7 +1647,7 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
dma_cookie_t cookie;

device = chan->device;
- dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
+ dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index;
buff_off = (size_t)buf & ~PAGE_MASK;
dest_off = (size_t)dest & ~PAGE_MASK;

@@ -1863,6 +1866,18 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
qp->rx_dma_chan = NULL;
}

+ if (qp->tx_dma_chan) {
+ qp->tx_mw_dma_addr =
+ dma_map_resource(qp->tx_dma_chan->device->dev,
+ qp->tx_mw_phys, qp->tx_mw_size,
+ DMA_FROM_DEVICE, 0);
+ if (dma_mapping_error(qp->tx_dma_chan->device->dev,
+ qp->tx_mw_dma_addr)) {
+ qp->tx_mw_dma_addr = 0;
+ goto err1;
+ }
+ }
+
dev_dbg(&pdev->dev, "Using %s memcpy for TX\n",
qp->tx_dma_chan ? "DMA" : "CPU");

@@ -1904,6 +1919,10 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
qp->rx_alloc_entry = 0;
while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
kfree(entry);
+ if (qp->tx_mw_dma_addr)
+ dma_unmap_resource(qp->tx_dma_chan->device->dev,
+ qp->tx_mw_dma_addr, qp->tx_mw_size,
+ DMA_FROM_DEVICE, 0);
if (qp->tx_dma_chan)
dma_release_channel(qp->tx_dma_chan);
if (qp->rx_dma_chan)
@@ -1945,6 +1964,11 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
*/
dma_sync_wait(chan, qp->last_cookie);
dmaengine_terminate_all(chan);
+
+ dma_unmap_resource(chan->device->dev,
+ qp->tx_mw_dma_addr, qp->tx_mw_size,
+ DMA_FROM_DEVICE, 0);
+
dma_release_channel(chan);
}

--
2.19.0



2019-01-19 00:26:44

by Dave Jiang

[permalink] [raw]
Subject: Re: [PATCH] NTB: ntb_transport: Ensure the destination buffer is mapped for TX DMA



On 1/18/19 5:10 PM, Logan Gunthorpe wrote:
> Presently, when ntb_transport is used with DMA and the IOMMU turned on,
> it fails with errors from the IOMMU such as:
>
> DMAR: DRHD: handling fault status reg 202
> DMAR: [DMA Write] Request device [00:04.0] fault addr
> 381fc0340000 [fault reason 05] PTE Write access is not set
>
> This is because ntb_transport does not map the BAR space with the IOMMU.
>
> To fix this, we map the entire MW region for each QP after we assign
> the DMA channel. This prevents needing an extra DMA map in the fast
> path.
>
> Link: https://lore.kernel.org/linux-pci/[email protected]/
> Signed-off-by: Logan Gunthorpe <[email protected]>
> Cc: Jon Mason <[email protected]>
> Cc: Dave Jiang <[email protected]>
> Cc: Allen Hubbe <[email protected]>

Nice! I actually never encountered this on the Intel NTB with IOMMU on.
It also could be that the Intel BIOS already took care of it for all
embedded device BARs on the uncore. Nevertheless it's needed. Thanks!

Reviewed-by: Dave Jiang <[email protected]>

> ---
> drivers/ntb/ntb_transport.c | 28 ++++++++++++++++++++++++++--
> 1 file changed, 26 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
> index 3bfdb4562408..526b65afc16a 100644
> --- a/drivers/ntb/ntb_transport.c
> +++ b/drivers/ntb/ntb_transport.c
> @@ -144,7 +144,9 @@ struct ntb_transport_qp {
> struct list_head tx_free_q;
> spinlock_t ntb_tx_free_q_lock;
> void __iomem *tx_mw;
> - dma_addr_t tx_mw_phys;
> + phys_addr_t tx_mw_phys;
> + size_t tx_mw_size;
> + dma_addr_t tx_mw_dma_addr;
> unsigned int tx_index;
> unsigned int tx_max_entry;
> unsigned int tx_max_frame;
> @@ -1049,6 +1051,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
> tx_size = (unsigned int)mw_size / num_qps_mw;
> qp_offset = tx_size * (qp_num / mw_count);
>
> + qp->tx_mw_size = tx_size;
> qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
> if (!qp->tx_mw)
> return -EINVAL;
> @@ -1644,7 +1647,7 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
> dma_cookie_t cookie;
>
> device = chan->device;
> - dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
> + dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index;
> buff_off = (size_t)buf & ~PAGE_MASK;
> dest_off = (size_t)dest & ~PAGE_MASK;
>
> @@ -1863,6 +1866,18 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
> qp->rx_dma_chan = NULL;
> }
>
> + if (qp->tx_dma_chan) {
> + qp->tx_mw_dma_addr =
> + dma_map_resource(qp->tx_dma_chan->device->dev,
> + qp->tx_mw_phys, qp->tx_mw_size,
> + DMA_FROM_DEVICE, 0);
> + if (dma_mapping_error(qp->tx_dma_chan->device->dev,
> + qp->tx_mw_dma_addr)) {
> + qp->tx_mw_dma_addr = 0;
> + goto err1;
> + }
> + }
> +
> dev_dbg(&pdev->dev, "Using %s memcpy for TX\n",
> qp->tx_dma_chan ? "DMA" : "CPU");
>
> @@ -1904,6 +1919,10 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
> qp->rx_alloc_entry = 0;
> while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
> kfree(entry);
> + if (qp->tx_mw_dma_addr)
> + dma_unmap_resource(qp->tx_dma_chan->device->dev,
> + qp->tx_mw_dma_addr, qp->tx_mw_size,
> + DMA_FROM_DEVICE, 0);
> if (qp->tx_dma_chan)
> dma_release_channel(qp->tx_dma_chan);
> if (qp->rx_dma_chan)
> @@ -1945,6 +1964,11 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
> */
> dma_sync_wait(chan, qp->last_cookie);
> dmaengine_terminate_all(chan);
> +
> + dma_unmap_resource(chan->device->dev,
> + qp->tx_mw_dma_addr, qp->tx_mw_size,
> + DMA_FROM_DEVICE, 0);
> +
> dma_release_channel(chan);
> }
>
>

2019-02-11 15:56:31

by Jon Mason

[permalink] [raw]
Subject: Re: [PATCH] NTB: ntb_transport: Ensure the destination buffer is mapped for TX DMA

On Fri, Jan 18, 2019 at 05:25:20PM -0700, Dave Jiang wrote:
>
>
> On 1/18/19 5:10 PM, Logan Gunthorpe wrote:
> > Presently, when ntb_transport is used with DMA and the IOMMU turned on,
> > it fails with errors from the IOMMU such as:
> >
> > DMAR: DRHD: handling fault status reg 202
> > DMAR: [DMA Write] Request device [00:04.0] fault addr
> > 381fc0340000 [fault reason 05] PTE Write access is not set
> >
> > This is because ntb_transport does not map the BAR space with the IOMMU.
> >
> > To fix this, we map the entire MW region for each QP after we assign
> > the DMA channel. This prevents needing an extra DMA map in the fast
> > path.
> >
> > Link: https://lore.kernel.org/linux-pci/[email protected]/
> > Signed-off-by: Logan Gunthorpe <[email protected]>
> > Cc: Jon Mason <[email protected]>
> > Cc: Dave Jiang <[email protected]>
> > Cc: Allen Hubbe <[email protected]>
>
> Nice! I actually never encountered this on the Intel NTB with IOMMU on.
> It also could be that the Intel BIOS already took care of it for all
> embedded device BARs on the uncore. Nevertheless it's needed. Thanks!
>
> Reviewed-by: Dave Jiang <[email protected]>

Added to the ntb branch, thanks!

>
> > ---
> > drivers/ntb/ntb_transport.c | 28 ++++++++++++++++++++++++++--
> > 1 file changed, 26 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
> > index 3bfdb4562408..526b65afc16a 100644
> > --- a/drivers/ntb/ntb_transport.c
> > +++ b/drivers/ntb/ntb_transport.c
> > @@ -144,7 +144,9 @@ struct ntb_transport_qp {
> > struct list_head tx_free_q;
> > spinlock_t ntb_tx_free_q_lock;
> > void __iomem *tx_mw;
> > - dma_addr_t tx_mw_phys;
> > + phys_addr_t tx_mw_phys;
> > + size_t tx_mw_size;
> > + dma_addr_t tx_mw_dma_addr;
> > unsigned int tx_index;
> > unsigned int tx_max_entry;
> > unsigned int tx_max_frame;
> > @@ -1049,6 +1051,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
> > tx_size = (unsigned int)mw_size / num_qps_mw;
> > qp_offset = tx_size * (qp_num / mw_count);
> >
> > + qp->tx_mw_size = tx_size;
> > qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
> > if (!qp->tx_mw)
> > return -EINVAL;
> > @@ -1644,7 +1647,7 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
> > dma_cookie_t cookie;
> >
> > device = chan->device;
> > - dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
> > + dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index;
> > buff_off = (size_t)buf & ~PAGE_MASK;
> > dest_off = (size_t)dest & ~PAGE_MASK;
> >
> > @@ -1863,6 +1866,18 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
> > qp->rx_dma_chan = NULL;
> > }
> >
> > + if (qp->tx_dma_chan) {
> > + qp->tx_mw_dma_addr =
> > + dma_map_resource(qp->tx_dma_chan->device->dev,
> > + qp->tx_mw_phys, qp->tx_mw_size,
> > + DMA_FROM_DEVICE, 0);
> > + if (dma_mapping_error(qp->tx_dma_chan->device->dev,
> > + qp->tx_mw_dma_addr)) {
> > + qp->tx_mw_dma_addr = 0;
> > + goto err1;
> > + }
> > + }
> > +
> > dev_dbg(&pdev->dev, "Using %s memcpy for TX\n",
> > qp->tx_dma_chan ? "DMA" : "CPU");
> >
> > @@ -1904,6 +1919,10 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
> > qp->rx_alloc_entry = 0;
> > while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
> > kfree(entry);
> > + if (qp->tx_mw_dma_addr)
> > + dma_unmap_resource(qp->tx_dma_chan->device->dev,
> > + qp->tx_mw_dma_addr, qp->tx_mw_size,
> > + DMA_FROM_DEVICE, 0);
> > if (qp->tx_dma_chan)
> > dma_release_channel(qp->tx_dma_chan);
> > if (qp->rx_dma_chan)
> > @@ -1945,6 +1964,11 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
> > */
> > dma_sync_wait(chan, qp->last_cookie);
> > dmaengine_terminate_all(chan);
> > +
> > + dma_unmap_resource(chan->device->dev,
> > + qp->tx_mw_dma_addr, qp->tx_mw_size,
> > + DMA_FROM_DEVICE, 0);
> > +
> > dma_release_channel(chan);
> > }
> >
> >