2021-10-29 07:04:31

by Shunsuke Mie

[permalink] [raw]
Subject: [RFC PATCH v3 0/2] RDMA/rxe: Add dma-buf support

This patch series add a dma-buf support for rxe driver.

A dma-buf based memory registering has beed introduced to use the memory
region that lack of associated page structures (e.g. device memory and CMA
managed memory) [1]. However, to use the dma-buf based memory, each rdma
device drivers require add some implementation. The rxe driver has not
support yet.

[1] https://www.spinics.net/lists/linux-rdma/msg98592.html

To enable to use the dma-buf memory in rxe rdma device, add some changes
and implementation in this patch series.

This series consists of two patches. The first patch changes the IB core
to support for rdma drivers that has not dma device. The secound patch adds
the dma-buf support to rxe driver.

Related user space RDMA library changes are provided as a separate patch.

v3:
* Rebase to the latest linux-rdma 'for-next' branch (5.15.0-rc6+)
* Fix to use dma-buf-map helpers
v2: https://www.spinics.net/lists/linux-rdma/msg105928.html
* Rebase to the latest linux-rdma 'for-next' branch (5.15.0-rc1+)
* Instead of using a dummy dma_device to attach dma-buf, just store
dma-buf to use software RDMA driver
* Use dma-buf vmap() interface
* Check to pass tests of rdma-core
v1: https://www.spinics.net/lists/linux-rdma/msg105376.html
* The initial patch set
* Use ib_device as dma_device.
* Use dma-buf dynamic attach interface
* Add dma-buf support to rxe device

Shunsuke Mie (2):
RDMA/umem: Change for rdma devices has not dma device
RDMA/rxe: Add dma-buf support

drivers/infiniband/core/umem_dmabuf.c | 20 ++++-
drivers/infiniband/sw/rxe/rxe_loc.h | 2 +
drivers/infiniband/sw/rxe/rxe_mr.c | 113 ++++++++++++++++++++++++++
drivers/infiniband/sw/rxe/rxe_verbs.c | 34 ++++++++
include/rdma/ib_umem.h | 1 +
5 files changed, 166 insertions(+), 4 deletions(-)

--
2.17.1


2021-10-29 07:04:56

by Shunsuke Mie

[permalink] [raw]
Subject: [RFC PATCH v3 1/2] RDMA/umem: Change for rdma devices has not dma device

Current implementation requires a dma device for RDMA driver to use
dma-buf memory space as RDMA buffer. However, software RDMA drivers has
not dma device and copy RDMA data using CPU instead of hardware.

This patch changes to be hold a dma-buf on struct ib_umem_dmabuf. This
allows the software RDMA driver to map dma-buf memory for CPU memory
accessing.

Signed-off-by: Shunsuke Mie <[email protected]>
---
drivers/infiniband/core/umem_dmabuf.c | 20 ++++++++++++++++----
include/rdma/ib_umem.h | 1 +
2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index e824baf4640d..ebbb0a259fd4 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -117,9 +117,6 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
if (check_add_overflow(offset, (unsigned long)size, &end))
return ret;

- if (unlikely(!ops || !ops->move_notify))
- return ret;
-
dmabuf = dma_buf_get(fd);
if (IS_ERR(dmabuf))
return ERR_CAST(dmabuf);
@@ -133,6 +130,8 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
goto out_release_dmabuf;
}

+ umem_dmabuf->dmabuf = dmabuf;
+
umem = &umem_dmabuf->umem;
umem->ibdev = device;
umem->length = size;
@@ -143,6 +142,13 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
if (!ib_umem_num_pages(umem))
goto out_free_umem;

+ /* Software RDMA drivers has not dma device. Just get dmabuf from fd */
+ if (!device->dma_device)
+ goto done;
+
+ if (unlikely(!ops || !ops->move_notify))
+ goto out_free_umem;
+
umem_dmabuf->attach = dma_buf_dynamic_attach(
dmabuf,
device->dma_device,
@@ -152,6 +158,7 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
ret = ERR_CAST(umem_dmabuf->attach);
goto out_free_umem;
}
+done:
return umem_dmabuf;

out_free_umem:
@@ -165,13 +172,18 @@ EXPORT_SYMBOL(ib_umem_dmabuf_get);

void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf)
{
- struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf;
+ struct dma_buf *dmabuf = umem_dmabuf->dmabuf;
+
+ if (!umem_dmabuf->attach)
+ goto free_dmabuf;

dma_resv_lock(dmabuf->resv, NULL);
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
dma_resv_unlock(dmabuf->resv);

dma_buf_detach(dmabuf, umem_dmabuf->attach);
+
+free_dmabuf:
dma_buf_put(dmabuf);
kfree(umem_dmabuf);
}
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 5ae9dff74dac..11c0cf7e0dd8 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -32,6 +32,7 @@ struct ib_umem {
struct ib_umem_dmabuf {
struct ib_umem umem;
struct dma_buf_attachment *attach;
+ struct dma_buf *dmabuf;
struct sg_table *sgt;
struct scatterlist *first_sg;
struct scatterlist *last_sg;
--
2.17.1

2021-10-29 07:08:00

by Shunsuke Mie

[permalink] [raw]
Subject: [RFC PATCH v3 2/2] RDMA/rxe: Add dma-buf support

Implement a ib device operation ‘reg_user_mr_dmabuf’. Generate a
rxe_map from the memory space linked the passed dma-buf.

Signed-off-by: Shunsuke Mie <[email protected]>
---
drivers/infiniband/sw/rxe/rxe_loc.h | 2 +
drivers/infiniband/sw/rxe/rxe_mr.c | 113 ++++++++++++++++++++++++++
drivers/infiniband/sw/rxe/rxe_verbs.c | 34 ++++++++
3 files changed, 149 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 1ca43b859d80..8bc19ea1a376 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -75,6 +75,8 @@ u8 rxe_get_next_key(u32 last_key);
void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr);
int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
int access, struct rxe_mr *mr);
+int rxe_mr_dmabuf_init_user(struct rxe_pd *pd, int fd, u64 start, u64 length,
+ u64 iova, int access, struct rxe_mr *mr);
int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr);
int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
enum rxe_mr_copy_dir dir);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 53271df10e47..ba1282b99772 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -4,6 +4,8 @@
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/

+#include <linux/dma-buf.h>
+#include <linux/dma-buf-map.h>
#include "rxe.h"
#include "rxe_loc.h"

@@ -245,6 +247,114 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
return err;
}

+static int rxe_map_dmabuf_mr(struct rxe_mr *mr,
+ struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct rxe_map_set *set;
+ struct rxe_phys_buf *buf = NULL;
+ struct rxe_map **map;
+ void *vaddr, *vaddr_end;
+ int num_buf = 0;
+ int err;
+ size_t remain;
+ struct dma_buf_map dmabuf_map;
+
+ err = dma_buf_vmap(umem_dmabuf->dmabuf, &dmabuf_map);
+ if (err || dmabuf_map.is_iomem)
+ goto err_out;
+
+ set = mr->cur_map_set;
+ set->page_shift = PAGE_SHIFT;
+ set->page_mask = PAGE_SIZE - 1;
+
+ map = set->map;
+ buf = map[0]->buf;
+
+ vaddr = dmabuf_map.vaddr;
+ vaddr_end = vaddr + umem_dmabuf->dmabuf->size;
+ remain = umem_dmabuf->dmabuf->size;
+
+ for (; remain; vaddr += PAGE_SIZE) {
+ if (num_buf >= RXE_BUF_PER_MAP) {
+ map++;
+ buf = map[0]->buf;
+ num_buf = 0;
+ }
+
+ buf->addr = (uintptr_t)vaddr;
+ if (remain >= PAGE_SIZE)
+ buf->size = PAGE_SIZE;
+ else
+ buf->size = remain;
+ remain -= buf->size;
+
+ num_buf++;
+ buf++;
+ }
+
+ return 0;
+
+err_out:
+ return err;
+}
+
+static void rxe_unmap_dmabuf_mr(struct rxe_mr *mr)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
+ struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(
+ (void *)mr->cur_map_set->map[0]->buf->addr);
+
+ dma_buf_vunmap(umem_dmabuf->dmabuf, &map);
+}
+
+int rxe_mr_dmabuf_init_user(struct rxe_pd *pd, int fd, u64 start, u64 length,
+ u64 iova, int access, struct rxe_mr *mr)
+{
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct rxe_map_set *set;
+ int err;
+
+ umem_dmabuf = ib_umem_dmabuf_get(pd->ibpd.device, start, length, fd,
+ access, NULL);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ goto err_out;
+ }
+
+ rxe_mr_init(access, mr);
+
+ err = rxe_mr_alloc(mr, ib_umem_num_pages(&umem_dmabuf->umem), 0);
+ if (err) {
+ pr_warn("%s: Unable to allocate memory for map\n", __func__);
+ goto err_release_umem;
+ }
+
+ mr->ibmr.pd = &pd->ibpd;
+ mr->umem = &umem_dmabuf->umem;
+ mr->access = access;
+ mr->state = RXE_MR_STATE_VALID;
+ mr->type = IB_MR_TYPE_USER;
+
+ set = mr->cur_map_set;
+ set->length = length;
+ set->iova = iova;
+ set->va = start;
+ set->offset = ib_umem_offset(mr->umem);
+
+ err = rxe_map_dmabuf_mr(mr, umem_dmabuf);
+ if (err)
+ goto err_free_map_set;
+
+ return 0;
+
+err_free_map_set:
+ rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
+err_release_umem:
+ ib_umem_release(&umem_dmabuf->umem);
+err_out:
+ return err;
+}
+
int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
{
int err;
@@ -703,6 +813,9 @@ void rxe_mr_cleanup(struct rxe_pool_entry *arg)
{
struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);

+ if (mr->umem && mr->umem->is_dmabuf)
+ rxe_unmap_dmabuf_mr(mr);
+
ib_umem_release(mr->umem);

if (mr->cur_map_set)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 0aa0d7e52773..dc7d27b3cb90 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -940,6 +940,39 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
return ERR_PTR(err);
}

+static struct ib_mr *rxe_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
+ u64 length, u64 iova, int fd,
+ int access, struct ib_udata *udata)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(ibpd->device);
+ struct rxe_pd *pd = to_rpd(ibpd);
+ struct rxe_mr *mr;
+
+ mr = rxe_alloc(&rxe->mr_pool);
+ if (!mr) {
+ err = -ENOMEM;
+ goto err2;
+ }
+
+ rxe_add_index(mr);
+
+ rxe_add_ref(pd);
+
+ err = rxe_mr_dmabuf_init_user(pd, fd, start, length, iova, access, mr);
+ if (err)
+ goto err3;
+
+ return &mr->ibmr;
+
+err3:
+ rxe_drop_ref(pd);
+ rxe_drop_index(mr);
+ rxe_drop_ref(mr);
+err2:
+ return ERR_PTR(err);
+}
+
static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
@@ -1105,6 +1138,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.query_qp = rxe_query_qp,
.query_srq = rxe_query_srq,
.reg_user_mr = rxe_reg_user_mr,
+ .reg_user_mr_dmabuf = rxe_reg_user_mr_dmabuf,
.req_notify_cq = rxe_req_notify_cq,
.resize_cq = rxe_resize_cq,

--
2.17.1