2020-09-30 11:55:06

by Alexandre Bailon

[permalink] [raw]
Subject: [RFC PATCH 0/4] Add a RPMsg driver to support AI Processing Unit (APU)

This adds a RPMsg driver that implements communication between the CPU and an
APU.
This uses VirtIO buffer to exchange messages but for sharing data, this uses
a dmabuf, mapped to be shared between CPU (userspace) and APU.
The driver is relatively generic, and should work with any SoC implementing
hardware accelerator for AI if they use support remoteproc and VirtIO.

For the people interested by the firmware or userspace library,
the sources are available here:
https://github.com/BayLibre/open-amp/tree/v2020.01-mtk/apps/examples/apu

Alexandre Bailon (3):
Add a RPMSG driver for the APU in the mt8183
rpmsg: apu_rpmsg: update the way to store IOMMU mapping
rpmsg: apu_rpmsg: Add an IOCTL to request IOMMU mapping

Julien STEPHAN (1):
rpmsg: apu_rpmsg: Add support for async apu request

drivers/rpmsg/Kconfig | 9 +
drivers/rpmsg/Makefile | 1 +
drivers/rpmsg/apu_rpmsg.c | 752 +++++++++++++++++++++++++++++++++
drivers/rpmsg/apu_rpmsg.h | 52 +++
include/uapi/linux/apu_rpmsg.h | 47 +++
5 files changed, 861 insertions(+)
create mode 100644 drivers/rpmsg/apu_rpmsg.c
create mode 100644 drivers/rpmsg/apu_rpmsg.h
create mode 100644 include/uapi/linux/apu_rpmsg.h

--
2.26.2


2020-09-30 11:55:32

by Alexandre Bailon

[permalink] [raw]
Subject: [RFC PATCH 3/4] rpmsg: apu_rpmsg: update the way to store IOMMU mapping

In order to reduce the memory mapping operations we are going to
add an IOCTL to request a mapping.
To make easier to add this new operation, use 2 lists to store the
mappings, one for the request and one for the device.

Signed-off-by: Alexandre Bailon <[email protected]>
---
drivers/rpmsg/apu_rpmsg.c | 104 +++++++++++++++++++++++++-------------
1 file changed, 70 insertions(+), 34 deletions(-)

diff --git a/drivers/rpmsg/apu_rpmsg.c b/drivers/rpmsg/apu_rpmsg.c
index e14597c467d7..343bd08a859a 100644
--- a/drivers/rpmsg/apu_rpmsg.c
+++ b/drivers/rpmsg/apu_rpmsg.c
@@ -38,12 +38,14 @@ struct rpmsg_apu {
u8 available_response;
spinlock_t ctx_lock;
struct list_head requests;
+
+ struct list_head buffers;
};

struct rpmsg_request {
u8 ready;
struct list_head node;
- struct apu_buffer *buffer;
+ struct list_head buffers;
void *req;
};

@@ -53,6 +55,11 @@ struct apu_buffer {
struct dma_buf_attachment *attachment;
struct sg_table *sg_table;
u32 iova;
+
+ struct rpmsg_apu *apu;
+ struct list_head node;
+ struct list_head req_node;
+ struct kref refcount;
};

/*
@@ -106,23 +113,46 @@ static int apu_rpmsg_callback(struct rpmsg_device *rpdev, void *data, int count,
return 0;
}

-static int apu_device_memory_map(struct rpmsg_apu *apu,
- struct apu_buffer *buffer)
+static struct apu_buffer *apu_device_memory_map(struct rpmsg_apu *apu,
+ uint32_t fd, struct rpmsg_request *rpmsg_req)
{
struct rpmsg_device *rpdev = apu->rpdev;
+ struct apu_buffer *buffer;
phys_addr_t phys;
int total_buf_space;
int iova_pfn;
int ret;

- if (!buffer->fd)
- return 0;
+ if (!fd)
+ return NULL;
+
+ list_for_each_entry(buffer, &apu->buffers, node) {
+ if (buffer->fd == fd) {
+ kref_get(&buffer->refcount);
+ if (rpmsg_req)
+ list_add(&buffer->req_node,
+ &rpmsg_req->buffers);
+
+ return buffer;
+ }
+ }
+
+ buffer = kmalloc(sizeof(*buffer), GFP_KERNEL);
+ if (!buffer)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&buffer->refcount);
+ buffer->fd = fd;
+ buffer->apu = apu;
+ INIT_LIST_HEAD(&buffer->req_node);
+ INIT_LIST_HEAD(&buffer->node);

buffer->dma_buf = dma_buf_get(buffer->fd);
if (IS_ERR(buffer->dma_buf)) {
dev_err(&rpdev->dev, "Failed to get dma_buf from fd: %ld\n",
PTR_ERR(buffer->dma_buf));
- return PTR_ERR(buffer->dma_buf);
+ ret = PTR_ERR(buffer->dma_buf);
+ goto err_free_buffer;
}

buffer->attachment = dma_buf_attach(buffer->dma_buf, &rpdev->dev);
@@ -158,7 +188,9 @@ static int apu_device_memory_map(struct rpmsg_apu *apu,
goto err_free_iova;
}

- return 0;
+ list_add(&buffer->node, &apu->buffers);
+
+ return buffer;

err_free_iova:
free_iova(apu->iovad, iova_pfn);
@@ -170,13 +202,17 @@ static int apu_device_memory_map(struct rpmsg_apu *apu,
dma_buf_detach(buffer->dma_buf, buffer->attachment);
err_dma_buf_put:
dma_buf_put(buffer->dma_buf);
+err_free_buffer:
+ kfree(buffer);

- return ret;
+ return ERR_PTR(ret);
}

-static void apu_device_memory_unmap(struct rpmsg_apu *apu,
- struct apu_buffer *buffer)
+static void apu_device_memory_unmap(struct kref *ref)
{
+ struct apu_buffer *buffer = container_of(ref, struct apu_buffer,
+ refcount);
+ struct rpmsg_apu *apu = buffer->apu;
int total_buf_space;

if (!buffer->fd)
@@ -190,6 +226,8 @@ static void apu_device_memory_unmap(struct rpmsg_apu *apu,
DMA_BIDIRECTIONAL);
dma_buf_detach(buffer->dma_buf, buffer->attachment);
dma_buf_put(buffer->dma_buf);
+ list_del(&buffer->node);
+ kfree(buffer);
}

static int apu_send_request(struct rpmsg_apu *apu,
@@ -198,7 +236,7 @@ static int apu_send_request(struct rpmsg_apu *apu,
int ret;
struct rpmsg_device *rpdev = apu->rpdev;
struct apu_dev_request *dev_req;
- struct apu_buffer *buffer;
+ struct apu_buffer *buffer, *tmp;
struct rpmsg_request *rpmsg_req;
unsigned long flags;

@@ -222,14 +260,21 @@ static int apu_send_request(struct rpmsg_apu *apu,
dev_req_buffer_size = (u32 *)(dev_req_da + dev_req->count);
memcpy(dev_req->data, req->data, req->size_in);

- buffer = kmalloc_array(req->count, sizeof(*buffer), GFP_KERNEL);
+ rpmsg_req = kzalloc(sizeof(*rpmsg_req), GFP_KERNEL);
+ if (!rpmsg_req)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&rpmsg_req->buffers);
for (i = 0; i < req->count; i++) {
- buffer[i].fd = fd[i];
- ret = apu_device_memory_map(apu, &buffer[i]);
- if (ret)
+ buffer = apu_device_memory_map(apu, fd[i], rpmsg_req);
+ if (IS_ERR(buffer)) {
+ ret = PTR_ERR(buffer);
goto err_free_memory;
- dev_req_da[i] = buffer[i].iova;
+ }
+
+ dev_req_da[i] = buffer->iova;
dev_req_buffer_size[i] = buffer_size[i];
+ list_add(&buffer->req_node, &rpmsg_req->buffers);
}

ret = ida_simple_get(&req_ida, 0, 0xffff, GFP_KERNEL);
@@ -238,15 +283,8 @@ static int apu_send_request(struct rpmsg_apu *apu,

dev_req->id = ret;

- rpmsg_req = kzalloc(sizeof(*rpmsg_req), GFP_KERNEL);
- if (!rpmsg_req) {
- ret = -ENOMEM;
- goto err_ida_remove;
- }
-
req->id = dev_req->id;
rpmsg_req->req = req;
- rpmsg_req->buffer = buffer;
spin_lock_irqsave(&apu->ctx_lock, flags);
list_add(&rpmsg_req->node, &apu->requests);
spin_unlock_irqrestore(&apu->ctx_lock, flags);
@@ -261,15 +299,12 @@ static int apu_send_request(struct rpmsg_apu *apu,

err:
list_del(&rpmsg_req->node);
- kfree(rpmsg_req);
kfree(req);
-err_ida_remove:
ida_simple_remove(&req_ida, dev_req->id);
err_free_memory:
- for (i--; i >= 0; i--)
- apu_device_memory_unmap(apu, &buffer[i]);
-
- kfree(buffer);
+ list_for_each_entry_safe(buffer, tmp, &rpmsg_req->buffers, req_node)
+ kref_put(&buffer->refcount, apu_device_memory_unmap);
+ kfree(rpmsg_req);
kfree(dev_req);

return ret;
@@ -296,12 +331,12 @@ static long rpmsg_eptdev_ioctl(struct file *fp, unsigned int cmd,
struct rpmsg_apu *apu = fp->private_data;
struct apu_request apu_req;
struct apu_request *apu_req_full;
+ struct apu_buffer *buffer, *tmp;
void __user *argp = (void __user *)arg;
int len;
int ret;
unsigned long flags;
struct rpmsg_request *rpmsg_req;
- int i;

ret = 0;

@@ -370,11 +405,11 @@ static long rpmsg_eptdev_ioctl(struct file *fp, unsigned int cmd,
ret = -EFAULT;
apu->available_response--;
ida_simple_remove(&req_ida, req->id);
- for (i = 0; i < req->count ; i++)
- apu_device_memory_unmap(apu,
- &rpmsg_req->buffer[i]);
+ list_for_each_entry_safe(buffer, tmp, &rpmsg_req->buffers, req_node) {
+ kref_put(&buffer->refcount, apu_device_memory_unmap);
+ list_del(&buffer->req_node);
+ }
list_del(&rpmsg_req->node);
- kfree(rpmsg_req->buffer);
kfree(rpmsg_req->req);
kfree(rpmsg_req);
break;
@@ -554,6 +589,7 @@ static int apu_rpmsg_probe(struct rpmsg_device *rpdev)
if (!apu)
return -ENOMEM;
apu->rpdev = rpdev;
+ INIT_LIST_HEAD(&apu->buffers);

apu->rproc = apu_get_rproc(rpdev);
if (IS_ERR_OR_NULL(apu->rproc))
--
2.26.2

2020-09-30 11:56:29

by Alexandre Bailon

[permalink] [raw]
Subject: [RFC PATCH 2/4] rpmsg: apu_rpmsg: Add support for async apu request

From: Julien STEPHAN <[email protected]>

In order to improve performances and flexibility,
add support of async request.

Signed-off-by: Julien STEPHAN <[email protected]>
Signed-off-by: Alexandre Bailon <[email protected]>
---
drivers/rpmsg/apu_rpmsg.c | 208 ++++++++++++++++++++++-----------
include/uapi/linux/apu_rpmsg.h | 6 +-
2 files changed, 144 insertions(+), 70 deletions(-)

diff --git a/drivers/rpmsg/apu_rpmsg.c b/drivers/rpmsg/apu_rpmsg.c
index 5131b8b8e1f2..e14597c467d7 100644
--- a/drivers/rpmsg/apu_rpmsg.c
+++ b/drivers/rpmsg/apu_rpmsg.c
@@ -34,11 +34,16 @@ struct rpmsg_apu {
struct iommu_domain *domain;
struct iova_domain *iovad;
int iova_limit_pfn;
+ wait_queue_head_t waitqueue;
+ u8 available_response;
+ spinlock_t ctx_lock;
+ struct list_head requests;
};

struct rpmsg_request {
- struct completion completion;
+ u8 ready;
struct list_head node;
+ struct apu_buffer *buffer;
void *req;
};

@@ -68,25 +73,35 @@ static dev_t rpmsg_major;
static DEFINE_IDA(rpmsg_ctrl_ida);
static DEFINE_IDA(rpmsg_minor_ida);
static DEFINE_IDA(req_ida);
-static LIST_HEAD(requests);
static struct apu_iova_domain *apu_iovad;

-static int apu_rpmsg_callback(struct rpmsg_device *dev, void *data, int count,
+
+static int apu_rpmsg_callback(struct rpmsg_device *rpdev, void *data, int count,
void *priv, u32 addr)
{
+ struct rpmsg_apu *apu = dev_get_drvdata(&rpdev->dev);
struct rpmsg_request *rpmsg_req;
struct apu_dev_request *hdr = data;
+ unsigned long flags;

- list_for_each_entry(rpmsg_req, &requests, node) {
- struct apu_dev_request *tmp_hdr = rpmsg_req->req;
+ spin_lock_irqsave(&apu->ctx_lock, flags);
+ list_for_each_entry(rpmsg_req, &apu->requests, node) {
+ struct apu_request *tmp_hdr = rpmsg_req->req;

if (hdr->id == tmp_hdr->id) {
- memcpy(rpmsg_req->req, data, count);
- complete(&rpmsg_req->completion);
-
- return 0;
+ rpmsg_req->ready = 1;
+ apu->available_response++;
+ tmp_hdr->result = hdr->result;
+ tmp_hdr->size_in = hdr->size_in;
+ tmp_hdr->size_out = hdr->size_out;
+ memcpy(tmp_hdr->data, hdr->data,
+ hdr->size_in+hdr->size_out);
+
+ wake_up_interruptible(&apu->waitqueue);
+ break;
}
}
+ spin_unlock_irqrestore(&apu->ctx_lock, flags);

return 0;
}
@@ -177,48 +192,6 @@ static void apu_device_memory_unmap(struct rpmsg_apu *apu,
dma_buf_put(buffer->dma_buf);
}

-static int _apu_send_request(struct rpmsg_apu *apu,
- struct rpmsg_device *rpdev,
- struct apu_dev_request *req, int len)
-{
-
- struct rpmsg_request *rpmsg_req;
- int ret = 0;
-
- req->id = ida_simple_get(&req_ida, 0, 0xffff, GFP_KERNEL);
- if (req->id < 0)
- return ret;
-
- rpmsg_req = kzalloc(sizeof(*rpmsg_req), GFP_KERNEL);
- if (!rpmsg_req)
- return -ENOMEM;
-
- rpmsg_req->req = req;
- init_completion(&rpmsg_req->completion);
- list_add(&rpmsg_req->node, &requests);
-
- ret = rpmsg_send(rpdev->ept, req, len);
- if (ret)
- goto free_req;
-
- /* be careful with race here between timeout and callback*/
- ret = wait_for_completion_timeout(&rpmsg_req->completion,
- msecs_to_jiffies(1000));
- if (!ret)
- ret = -ETIMEDOUT;
- else
- ret = 0;
-
- ida_simple_remove(&req_ida, req->id);
-
-free_req:
-
- list_del(&rpmsg_req->node);
- kfree(rpmsg_req);
-
- return ret;
-}
-
static int apu_send_request(struct rpmsg_apu *apu,
struct apu_request *req)
{
@@ -226,6 +199,8 @@ static int apu_send_request(struct rpmsg_apu *apu,
struct rpmsg_device *rpdev = apu->rpdev;
struct apu_dev_request *dev_req;
struct apu_buffer *buffer;
+ struct rpmsg_request *rpmsg_req;
+ unsigned long flags;

int size = req->size_in + req->size_out +
sizeof(u32) * req->count * 2 + sizeof(*dev_req);
@@ -257,24 +232,63 @@ static int apu_send_request(struct rpmsg_apu *apu,
dev_req_buffer_size[i] = buffer_size[i];
}

- ret = _apu_send_request(apu, rpdev, dev_req, size);
+ ret = ida_simple_get(&req_ida, 0, 0xffff, GFP_KERNEL);
+ if (ret < 0)
+ goto err_free_memory;
+
+ dev_req->id = ret;
+
+ rpmsg_req = kzalloc(sizeof(*rpmsg_req), GFP_KERNEL);
+ if (!rpmsg_req) {
+ ret = -ENOMEM;
+ goto err_ida_remove;
+ }

+ req->id = dev_req->id;
+ rpmsg_req->req = req;
+ rpmsg_req->buffer = buffer;
+ spin_lock_irqsave(&apu->ctx_lock, flags);
+ list_add(&rpmsg_req->node, &apu->requests);
+ spin_unlock_irqrestore(&apu->ctx_lock, flags);
+
+ ret = rpmsg_send(rpdev->ept, dev_req, size);
+ if (ret < 0)
+ goto err;
+
+ kfree(dev_req);
+
+ return req->id;
+
+err:
+ list_del(&rpmsg_req->node);
+ kfree(rpmsg_req);
+ kfree(req);
+err_ida_remove:
+ ida_simple_remove(&req_ida, dev_req->id);
err_free_memory:
for (i--; i >= 0; i--)
apu_device_memory_unmap(apu, &buffer[i]);

- req->result = dev_req->result;
- req->size_in = dev_req->size_in;
- req->size_out = dev_req->size_out;
- memcpy(req->data, dev_req->data, dev_req->size_in + dev_req->size_out +
- sizeof(u32) * req->count);
-
kfree(buffer);
kfree(dev_req);

return ret;
}

+unsigned int rpmsg_eptdev_poll(struct file *fp, struct poll_table_struct *wait)
+{
+ struct rpmsg_apu *apu = fp->private_data;
+ unsigned long flags;
+
+ poll_wait(fp, &apu->waitqueue, wait);
+ spin_lock_irqsave(&apu->ctx_lock, flags);
+ if (apu->available_response) {
+ spin_unlock_irqrestore(&apu->ctx_lock, flags);
+ return POLLIN;
+ }
+ spin_unlock_irqrestore(&apu->ctx_lock, flags);
+ return 0;
+}

static long rpmsg_eptdev_ioctl(struct file *fp, unsigned int cmd,
unsigned long arg)
@@ -285,6 +299,11 @@ static long rpmsg_eptdev_ioctl(struct file *fp, unsigned int cmd,
void __user *argp = (void __user *)arg;
int len;
int ret;
+ unsigned long flags;
+ struct rpmsg_request *rpmsg_req;
+ int i;
+
+ ret = 0;

switch (cmd) {
case APU_SEND_REQ_IOCTL:
@@ -306,24 +325,69 @@ static long rpmsg_eptdev_ioctl(struct file *fp, unsigned int cmd,
}

ret = apu_send_request(apu, apu_req_full);
- if (ret) {
- kfree(apu_req_full);
- return ret;
+
+ break;
+ case APU_GET_NEXT_AVAILABLE_IOCTL:
+ ret = -ENOMSG;
+ spin_lock_irqsave(&apu->ctx_lock, flags);
+ list_for_each_entry(rpmsg_req, &apu->requests, node) {
+ if (rpmsg_req->ready == 1) {
+ struct apu_request *req =
+ rpmsg_req->req;
+
+ ret = 0;
+ if (copy_to_user(argp, &req->id, sizeof(__u16)))
+ ret = -EFAULT;
+ break;
+ }
}
+ spin_unlock_irqrestore(&apu->ctx_lock, flags);
+ break;
+ case APU_GET_RESP:
+ /* Get the header */
+ if (!argp)
+ return -EINVAL;

- if (copy_to_user(argp, apu_req_full, sizeof(apu_req) +
- sizeof(u32) * apu_req_full->count +
- apu_req_full->size_in + apu_req_full->size_out))
- ret = -EFAULT;
+ if (copy_from_user(&apu_req, argp,
+ sizeof(apu_req)))
+ return -EFAULT;

- kfree(apu_req_full);
- return ret;
+ spin_lock_irqsave(&apu->ctx_lock, flags);
+ list_for_each_entry(rpmsg_req, &apu->requests, node) {
+ struct apu_request *req = rpmsg_req->req;
+
+ if ((apu_req.id == req->id) &&
+ (rpmsg_req->ready == 1)) {
+ int req_len = sizeof(struct apu_request) +
+ req->size_in + req->size_out +
+ req->count * sizeof(u32)*2;
+ int apu_req_len = sizeof(struct apu_request) +
+ req->size_in + req->size_out +
+ req->count * sizeof(u32)*2;
+
+ len = min(req_len, apu_req_len);
+ if (copy_to_user(argp, req, len))
+ ret = -EFAULT;
+ apu->available_response--;
+ ida_simple_remove(&req_ida, req->id);
+ for (i = 0; i < req->count ; i++)
+ apu_device_memory_unmap(apu,
+ &rpmsg_req->buffer[i]);
+ list_del(&rpmsg_req->node);
+ kfree(rpmsg_req->buffer);
+ kfree(rpmsg_req->req);
+ kfree(rpmsg_req);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&apu->ctx_lock, flags);

+ break;
default:
- return -EINVAL;
+ ret = -EINVAL;
}

- return 0;
+ return ret;
}

static int rpmsg_eptdev_open(struct inode *inode, struct file *filp)
@@ -351,6 +415,7 @@ static const struct file_operations rpmsg_eptdev_fops = {
.release = rpmsg_eptdev_release,
.unlocked_ioctl = rpmsg_eptdev_ioctl,
.compat_ioctl = rpmsg_eptdev_ioctl,
+ .poll = rpmsg_eptdev_poll,
};

static void iova_domain_release(struct kref *ref)
@@ -512,6 +577,11 @@ static int apu_rpmsg_probe(struct rpmsg_device *rpdev)
dev->id = ret;
dev_set_name(&apu->dev, "apu%d", ret);

+ init_waitqueue_head(&apu->waitqueue);
+ spin_lock_init(&apu->ctx_lock);
+ apu->available_response = 0;
+ INIT_LIST_HEAD(&apu->requests);
+
ret = cdev_add(&apu->cdev, dev->devt, 1);
if (ret)
goto free_ctrl_ida;
diff --git a/include/uapi/linux/apu_rpmsg.h b/include/uapi/linux/apu_rpmsg.h
index 81c9e4af9a94..f61207520254 100644
--- a/include/uapi/linux/apu_rpmsg.h
+++ b/include/uapi/linux/apu_rpmsg.h
@@ -21,6 +21,7 @@
* by the APU.
*/
struct apu_request {
+ __u16 id;
__u16 cmd;
__u16 result;
__u16 size_in;
@@ -31,6 +32,9 @@ struct apu_request {
};

/* Send synchronous request to an APU */
-#define APU_SEND_REQ_IOCTL _IOWR(0xb7, 0x2, struct apu_request)
+
+#define APU_SEND_REQ_IOCTL _IOW(0xb7, 0x2, struct apu_request)
+#define APU_GET_NEXT_AVAILABLE_IOCTL _IOR(0xb7, 0x3, __u16)
+#define APU_GET_RESP _IOWR(0xb7, 0x4, struct apu_request)

#endif
--
2.26.2

2020-09-30 11:58:08

by Alexandre Bailon

[permalink] [raw]
Subject: [RFC PATCH 4/4] rpmsg: apu_rpmsg: Add an IOCTL to request IOMMU mapping

Currently, the kernel is automatically doing an IOMMU memory mapping.
But we want to do it automatically for two reasons:
- to reduce the overhead of each APU operation
- to get the device address and use it as input for an operation
This adds 2 IOCTL to manually IOMMU map and unmap memory.

Signed-off-by: Alexandre Bailon <[email protected]>
---
drivers/rpmsg/apu_rpmsg.c | 52 ++++++++++++++++++++++++++++++----
include/uapi/linux/apu_rpmsg.h | 7 +++++
2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/drivers/rpmsg/apu_rpmsg.c b/drivers/rpmsg/apu_rpmsg.c
index 343bd08a859a..4c064feddf5a 100644
--- a/drivers/rpmsg/apu_rpmsg.c
+++ b/drivers/rpmsg/apu_rpmsg.c
@@ -114,7 +114,7 @@ static int apu_rpmsg_callback(struct rpmsg_device *rpdev, void *data, int count,
}

static struct apu_buffer *apu_device_memory_map(struct rpmsg_apu *apu,
- uint32_t fd, struct rpmsg_request *rpmsg_req)
+ uint32_t fd)
{
struct rpmsg_device *rpdev = apu->rpdev;
struct apu_buffer *buffer;
@@ -129,10 +129,6 @@ static struct apu_buffer *apu_device_memory_map(struct rpmsg_apu *apu,
list_for_each_entry(buffer, &apu->buffers, node) {
if (buffer->fd == fd) {
kref_get(&buffer->refcount);
- if (rpmsg_req)
- list_add(&buffer->req_node,
- &rpmsg_req->buffers);
-
return buffer;
}
}
@@ -230,6 +226,44 @@ static void apu_device_memory_unmap(struct kref *ref)
kfree(buffer);
}

+static int apu_iommu_mmap_ioctl(struct rpmsg_apu *apu, void __user *argp)
+{
+ struct apu_iommu_mmap apu_iommu_mmap;
+ struct apu_buffer *buffer;
+ int ret;
+
+ if (copy_from_user(&apu_iommu_mmap, argp, sizeof(apu_iommu_mmap)))
+ return -EFAULT;
+
+ buffer = apu_device_memory_map(apu, apu_iommu_mmap.fd);
+ if (!buffer)
+ return -ENOMEM;
+
+ apu_iommu_mmap.da = buffer->iova;
+ if (copy_to_user(argp, &apu_iommu_mmap, sizeof(apu_iommu_mmap)))
+ ret = -EFAULT;
+
+ return 0;
+}
+
+static int apu_iommu_munmap_ioctl(struct rpmsg_apu *apu, void __user *argp)
+{
+ u32 fd;
+ struct apu_buffer *buffer, *tmp;
+
+ if (copy_from_user(&fd, argp, sizeof(fd)))
+ return -EFAULT;
+
+ list_for_each_entry_safe(buffer, tmp, &apu->buffers, node) {
+ if (buffer->fd == fd) {
+ kref_put(&buffer->refcount, apu_device_memory_unmap);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
static int apu_send_request(struct rpmsg_apu *apu,
struct apu_request *req)
{
@@ -266,7 +300,7 @@ static int apu_send_request(struct rpmsg_apu *apu,

INIT_LIST_HEAD(&rpmsg_req->buffers);
for (i = 0; i < req->count; i++) {
- buffer = apu_device_memory_map(apu, fd[i], rpmsg_req);
+ buffer = apu_device_memory_map(apu, fd[i]);
if (IS_ERR(buffer)) {
ret = PTR_ERR(buffer);
goto err_free_memory;
@@ -417,6 +451,12 @@ static long rpmsg_eptdev_ioctl(struct file *fp, unsigned int cmd,
}
spin_unlock_irqrestore(&apu->ctx_lock, flags);

+ break;
+ case APU_IOMMU_MMAP:
+ ret = apu_iommu_mmap_ioctl(apu, argp);
+ break;
+ case APU_IOMMU_MUNMAP:
+ ret = apu_iommu_munmap_ioctl(apu, argp);
break;
default:
ret = -EINVAL;
diff --git a/include/uapi/linux/apu_rpmsg.h b/include/uapi/linux/apu_rpmsg.h
index f61207520254..e9b841dcbcb4 100644
--- a/include/uapi/linux/apu_rpmsg.h
+++ b/include/uapi/linux/apu_rpmsg.h
@@ -31,10 +31,17 @@ struct apu_request {
__u8 data[0];
};

+struct apu_iommu_mmap {
+ __u32 fd;
+ __u32 da;
+};
+
/* Send synchronous request to an APU */

#define APU_SEND_REQ_IOCTL _IOW(0xb7, 0x2, struct apu_request)
#define APU_GET_NEXT_AVAILABLE_IOCTL _IOR(0xb7, 0x3, __u16)
#define APU_GET_RESP _IOWR(0xb7, 0x4, struct apu_request)
+#define APU_IOMMU_MMAP _IOWR(0xb7, 0x5, struct apu_iommu_mmap)
+#define APU_IOMMU_MUNMAP _IOWR(0xb7, 0x6, __u32)

#endif
--
2.26.2

2020-10-01 08:52:28

by Daniel Vetter

[permalink] [raw]
Subject: Re: [RFC PATCH 0/4] Add a RPMsg driver to support AI Processing Unit (APU)

On Wed, Sep 30, 2020 at 01:53:46PM +0200, Alexandre Bailon wrote:
> This adds a RPMsg driver that implements communication between the CPU and an
> APU.
> This uses VirtIO buffer to exchange messages but for sharing data, this uses
> a dmabuf, mapped to be shared between CPU (userspace) and APU.
> The driver is relatively generic, and should work with any SoC implementing
> hardware accelerator for AI if they use support remoteproc and VirtIO.
>
> For the people interested by the firmware or userspace library,
> the sources are available here:
> https://github.com/BayLibre/open-amp/tree/v2020.01-mtk/apps/examples/apu

Since this has open userspace (from a very cursory look), and smells very
much like an acceleration driver, and seems to use dma-buf for memory
management: Why is this not just a drm driver?
-Daniel

>
> Alexandre Bailon (3):
> Add a RPMSG driver for the APU in the mt8183
> rpmsg: apu_rpmsg: update the way to store IOMMU mapping
> rpmsg: apu_rpmsg: Add an IOCTL to request IOMMU mapping
>
> Julien STEPHAN (1):
> rpmsg: apu_rpmsg: Add support for async apu request
>
> drivers/rpmsg/Kconfig | 9 +
> drivers/rpmsg/Makefile | 1 +
> drivers/rpmsg/apu_rpmsg.c | 752 +++++++++++++++++++++++++++++++++
> drivers/rpmsg/apu_rpmsg.h | 52 +++
> include/uapi/linux/apu_rpmsg.h | 47 +++
> 5 files changed, 861 insertions(+)
> create mode 100644 drivers/rpmsg/apu_rpmsg.c
> create mode 100644 drivers/rpmsg/apu_rpmsg.h
> create mode 100644 include/uapi/linux/apu_rpmsg.h
>
> --
> 2.26.2
>
> _______________________________________________
> dri-devel mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

2020-10-01 17:32:07

by Alexandre Bailon

[permalink] [raw]
Subject: Re: [RFC PATCH 0/4] Add a RPMsg driver to support AI Processing Unit (APU)

Hi Daniel,

On 10/1/20 10:48 AM, Daniel Vetter wrote:
> On Wed, Sep 30, 2020 at 01:53:46PM +0200, Alexandre Bailon wrote:
>> This adds a RPMsg driver that implements communication between the CPU and an
>> APU.
>> This uses VirtIO buffer to exchange messages but for sharing data, this uses
>> a dmabuf, mapped to be shared between CPU (userspace) and APU.
>> The driver is relatively generic, and should work with any SoC implementing
>> hardware accelerator for AI if they use support remoteproc and VirtIO.
>>
>> For the people interested by the firmware or userspace library,
>> the sources are available here:
>> https://github.com/BayLibre/open-amp/tree/v2020.01-mtk/apps/examples/apu
> Since this has open userspace (from a very cursory look), and smells very
> much like an acceleration driver, and seems to use dma-buf for memory
> management: Why is this not just a drm driver?

I have never though to DRM since for me it was only a RPMsg driver.
I don't know well DRM. Could you tell me how you would do it so I could
have a look ?

Thanks,
Alexandre

> -Daniel
>
>> Alexandre Bailon (3):
>> Add a RPMSG driver for the APU in the mt8183
>> rpmsg: apu_rpmsg: update the way to store IOMMU mapping
>> rpmsg: apu_rpmsg: Add an IOCTL to request IOMMU mapping
>>
>> Julien STEPHAN (1):
>> rpmsg: apu_rpmsg: Add support for async apu request
>>
>> drivers/rpmsg/Kconfig | 9 +
>> drivers/rpmsg/Makefile | 1 +
>> drivers/rpmsg/apu_rpmsg.c | 752 +++++++++++++++++++++++++++++++++
>> drivers/rpmsg/apu_rpmsg.h | 52 +++
>> include/uapi/linux/apu_rpmsg.h | 47 +++
>> 5 files changed, 861 insertions(+)
>> create mode 100644 drivers/rpmsg/apu_rpmsg.c
>> create mode 100644 drivers/rpmsg/apu_rpmsg.h
>> create mode 100644 include/uapi/linux/apu_rpmsg.h
>>
>> --
>> 2.26.2
>>
>> _______________________________________________
>> dri-devel mailing list
>> [email protected]
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel

2020-10-02 09:36:36

by Daniel Vetter

[permalink] [raw]
Subject: Re: [RFC PATCH 0/4] Add a RPMsg driver to support AI Processing Unit (APU)

On Thu, Oct 01, 2020 at 07:28:27PM +0200, Alexandre Bailon wrote:
> Hi Daniel,
>
> On 10/1/20 10:48 AM, Daniel Vetter wrote:
> > On Wed, Sep 30, 2020 at 01:53:46PM +0200, Alexandre Bailon wrote:
> > > This adds a RPMsg driver that implements communication between the CPU and an
> > > APU.
> > > This uses VirtIO buffer to exchange messages but for sharing data, this uses
> > > a dmabuf, mapped to be shared between CPU (userspace) and APU.
> > > The driver is relatively generic, and should work with any SoC implementing
> > > hardware accelerator for AI if they use support remoteproc and VirtIO.
> > >
> > > For the people interested by the firmware or userspace library,
> > > the sources are available here:
> > > https://github.com/BayLibre/open-amp/tree/v2020.01-mtk/apps/examples/apu
> > Since this has open userspace (from a very cursory look), and smells very
> > much like an acceleration driver, and seems to use dma-buf for memory
> > management: Why is this not just a drm driver?
>
> I have never though to DRM since for me it was only a RPMsg driver.
> I don't know well DRM. Could you tell me how you would do it so I could have
> a look ?

Well internally it would still be an rpmsg driver ... I'm assuming that's
kinda similar to how most gpu drivers sit on top of a pci_device or a
platform_device, it's just a means to get at your "device"?

The part I'm talking about here is the userspace api. You're creating an
entirely new chardev interface, which at least from a quick look seems to
be based on dma-buf buffers and used to submit commands to your device to
do some kind of computing/processing. That's exactly what drivers/gpu/drm
does (if you ignore the display/modeset side of things) - at the kernel
level gpus have nothing to do with graphics, but all with handling buffer
objects and throwing workloads at some kind of accelerator thing.

Of course that's just my guess of what's going on, after scrolling through
your driver and userspace a bit, I might be completely off. But if my
guess is roughly right, then your driver is internally an rpmsg
driver, but towards userspace it should be a drm driver.

Cheers, Daniel

>
> Thanks,
> Alexandre
>
> > -Daniel
> >
> > > Alexandre Bailon (3):
> > > Add a RPMSG driver for the APU in the mt8183
> > > rpmsg: apu_rpmsg: update the way to store IOMMU mapping
> > > rpmsg: apu_rpmsg: Add an IOCTL to request IOMMU mapping
> > >
> > > Julien STEPHAN (1):
> > > rpmsg: apu_rpmsg: Add support for async apu request
> > >
> > > drivers/rpmsg/Kconfig | 9 +
> > > drivers/rpmsg/Makefile | 1 +
> > > drivers/rpmsg/apu_rpmsg.c | 752 +++++++++++++++++++++++++++++++++
> > > drivers/rpmsg/apu_rpmsg.h | 52 +++
> > > include/uapi/linux/apu_rpmsg.h | 47 +++
> > > 5 files changed, 861 insertions(+)
> > > create mode 100644 drivers/rpmsg/apu_rpmsg.c
> > > create mode 100644 drivers/rpmsg/apu_rpmsg.h
> > > create mode 100644 include/uapi/linux/apu_rpmsg.h
> > >
> > > --
> > > 2.26.2
> > >
> > > _______________________________________________
> > > dri-devel mailing list
> > > [email protected]
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> _______________________________________________
> dri-devel mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch