2018-11-30 10:51:24

by Srinivas Kandagatla

[permalink] [raw]
Subject: [RFC PATCH 0/6] char: Add support to Qualcomm FastRPC driver

This patchset adds support to Qualcomm FastRPC driver which implements
an IPC (Inter-Processor Communication) mechanism that allows for clients
to transparently make remote method invocations across processor boundaries.

The below diagram depicts invocation of a single method where the client
and objects reside on different processors. An object could expose
multiple methods which can be grouped together and referred to as an
interface.

: ,--------, ,------, ,-----------, ,------, ,--------,
: | | method | | | | | | method | |
: | Client |------->| Stub |->| Transport |->| Skel |------->| Object |
: | | | | | | | | | |
: `--------` `------` `-----------` `------` `--------`

Client: Linux user mode process that initiates the remote invocation
Stub: Auto generated code linked in with the user mode process that
takes care of marshaling parameters

Transport: Involved in carrying an invocation from a client to an
object. This involves two portions: 1) FastRPC Linux
kernel driver that receives the remote invocation, queues
them up and then waits for the response after signaling the
remote side. 2) Service running on the remote side that
dequeues the messages from the queue and dispatches them for
processing.
Skel: Auto generated code that takes care of un-marshaling
parameters
Object: Method implementation

Most of the work is derived from various downstream Qualcomm kernels.
Credits to various Qualcomm authors who have contributed to this code.
Specially Tharun Kumar Merugu <[email protected]>

Keeping the first version simple, below are few things are TODO:
- Support INIT_STATIC process
- Support remote mmap/unmap methods.
- Handling secure compute context banks.
- Add secure buffer support

This patchset has been tested on DB410c(msm8916), DB820c (msm8996), SDM845
Qualcomm SoCs with sample apps like matrix multiply, calculator
from Hexagon SDK.

Thanks,
srini

Srinivas Kandagatla (5):
char: dt-bindings: Add Qualcomm Fastrpc bindings
char: fastrpc: Add Qualcomm fastrpc basic driver model
char: fastrpc: Add support for context Invoke method
char: fastrpc: Add support for create remote init process
char: fastrpc: Add support for dmabuf exporter

Thierry Escande (1):
char: fastrpc: Add support for compat ioctls

.../devicetree/bindings/char/qcom,fastrpc.txt | 73 +
drivers/char/Kconfig | 10 +
drivers/char/Makefile | 1 +
drivers/char/fastrpc.c | 1749 +++++++++++++++++
include/uapi/linux/fastrpc.h | 82 +
5 files changed, 1915 insertions(+)
create mode 100644 Documentation/devicetree/bindings/char/qcom,fastrpc.txt
create mode 100644 drivers/char/fastrpc.c
create mode 100644 include/uapi/linux/fastrpc.h

--
2.19.2



2018-11-30 10:49:00

by Srinivas Kandagatla

[permalink] [raw]
Subject: [RFC PATCH 5/6] char: fastrpc: Add support for dmabuf exporter

User process can involve dealing with big buffer sizes, and also passing
buffers from one compute context bank to other compute context bank for
complex dsp algorithms.

This patch adds support to fastrpc to make it a proper dmabuf exporter
to avoid making copies of buffers.

Signed-off-by: Srinivas Kandagatla <[email protected]>
---
drivers/char/fastrpc.c | 173 ++++++++++++++++++++++++++++++++++-
include/uapi/linux/fastrpc.h | 8 ++
2 files changed, 180 insertions(+), 1 deletion(-)

diff --git a/drivers/char/fastrpc.c b/drivers/char/fastrpc.c
index 3630e883d3f4..3c52502eae9f 100644
--- a/drivers/char/fastrpc.c
+++ b/drivers/char/fastrpc.c
@@ -110,10 +110,20 @@ struct fastrpc_invoke_rsp {

struct fastrpc_buf {
struct fastrpc_user *fl;
+ struct dma_buf *dmabuf;
struct device *dev;
void *virt;
uint64_t phys;
size_t size;
+ /* Lock for dma buf attachments */
+ struct mutex lock;
+ struct list_head attachments;
+};
+
+struct fastrpc_dma_buf_attachment {
+ struct device *dev;
+ struct sg_table sgt;
+ struct list_head node;
};

struct fastrpc_map {
@@ -256,6 +266,9 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
if (!buf)
return -ENOMEM;

+ INIT_LIST_HEAD(&buf->attachments);
+ mutex_init(&buf->lock);
+
buf->fl = fl;
buf->virt = NULL;
buf->phys = 0;
@@ -394,6 +407,109 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
return ERR_PTR(err);
}

+static struct sg_table *fastrpc_map_dma_buf(struct dma_buf_attachment
+ *attachment, enum dma_data_direction dir)
+{
+ struct fastrpc_dma_buf_attachment *a = attachment->priv;
+ struct sg_table *table;
+
+ table = &a->sgt;
+
+ if (!dma_map_sg(attachment->dev, table->sgl, table->nents, dir))
+ return ERR_PTR(-ENOMEM);
+
+ return table;
+}
+
+static void fastrpc_unmap_dma_buf(struct dma_buf_attachment *attach,
+ struct sg_table *table,
+ enum dma_data_direction dir)
+{
+}
+
+static void fastrpc_release(struct dma_buf *dmabuf)
+{
+ struct fastrpc_buf *buffer = dmabuf->priv;
+
+ fastrpc_buf_free(buffer);
+}
+
+static int fastrpc_dma_buf_attach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attachment)
+{
+ struct fastrpc_dma_buf_attachment *a;
+ struct fastrpc_buf *buffer = dmabuf->priv;
+ int ret;
+
+ a = kzalloc(sizeof(*a), GFP_KERNEL);
+ if (!a)
+ return -ENOMEM;
+
+ ret = dma_get_sgtable(buffer->dev, &a->sgt, buffer->virt,
+ FASTRPC_PHYS(buffer->phys), buffer->size);
+ if (ret < 0) {
+ dev_err(buffer->dev, "failed to get scatterlist from DMA API\n");
+ return -EINVAL;
+ }
+
+ a->dev = attachment->dev;
+ INIT_LIST_HEAD(&a->node);
+ attachment->priv = a;
+
+ mutex_lock(&buffer->lock);
+ list_add(&a->node, &buffer->attachments);
+ mutex_unlock(&buffer->lock);
+
+ return 0;
+}
+
+static void fastrpc_dma_buf_detatch(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attachment)
+{
+ struct fastrpc_dma_buf_attachment *a = attachment->priv;
+ struct fastrpc_buf *buffer = dmabuf->priv;
+
+ mutex_lock(&buffer->lock);
+ list_del(&a->node);
+ mutex_unlock(&buffer->lock);
+ kfree(a);
+}
+
+static void *fastrpc_kmap(struct dma_buf *dmabuf, unsigned long pgnum)
+{
+ struct fastrpc_buf *buf = dmabuf->priv;
+
+ return buf->virt ? buf->virt + pgnum * PAGE_SIZE : NULL;
+}
+
+static void *fastrpc_vmap(struct dma_buf *dmabuf)
+{
+ struct fastrpc_buf *buf = dmabuf->priv;
+
+ return buf->virt;
+}
+
+static int fastrpc_mmap(struct dma_buf *dmabuf,
+ struct vm_area_struct *vma)
+{
+ struct fastrpc_buf *buf = dmabuf->priv;
+ size_t size = vma->vm_end - vma->vm_start;
+
+ return dma_mmap_coherent(buf->dev, vma, buf->virt,
+ FASTRPC_PHYS(buf->phys), size);
+}
+
+static const struct dma_buf_ops fastrpc_dma_buf_ops = {
+ .attach = fastrpc_dma_buf_attach,
+ .detach = fastrpc_dma_buf_detatch,
+ .map_dma_buf = fastrpc_map_dma_buf,
+ .unmap_dma_buf = fastrpc_unmap_dma_buf,
+ .mmap = fastrpc_mmap,
+ .map = fastrpc_kmap,
+ .vmap = fastrpc_vmap,
+ .release = fastrpc_release,
+};
+
static int fastrpc_map_create(struct fastrpc_user *fl, int fd, uintptr_t va,
size_t len, struct fastrpc_map **ppmap)
{
@@ -989,7 +1105,8 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
char __user *argp = (char __user *)arg;
int err;

- if (!fl->sctx) {
+ if (!fl->sctx && cmd != FASTRPC_IOCTL_ALLOC_DMA_BUFF &&
+ cmd != FASTRPC_IOCTL_FREE_DMA_BUFF) {
fl->sctx = fastrpc_session_alloc(cctx, 0);
if (!fl->sctx)
return -ENOENT;
@@ -1027,6 +1144,60 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
goto bail;
}
break;
+
+ case FASTRPC_IOCTL_FREE_DMA_BUFF: {
+ struct dma_buf *buf;
+ uint32_t info;
+
+ err = copy_from_user(&info, argp, sizeof(info));
+ if (err)
+ goto bail;
+
+ buf = dma_buf_get(info);
+ if (IS_ERR_OR_NULL(buf)) {
+ err = -EINVAL;
+ goto bail;
+ }
+ /*
+ * one for the last get and other for the ALLOC_DMA_BUFF ioctl
+ */
+ dma_buf_put(buf);
+ dma_buf_put(buf);
+ }
+ break;
+ case FASTRPC_IOCTL_ALLOC_DMA_BUFF: {
+ struct fastrpc_ioctl_alloc_dma_buf bp;
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+ struct fastrpc_buf *buf = NULL;
+
+ err = copy_from_user(&bp, argp, sizeof(bp));
+ if (err)
+ goto bail;
+
+ err = fastrpc_buf_alloc(fl, fl->dev, bp.size, &buf);
+ exp_info.ops = &fastrpc_dma_buf_ops;
+ exp_info.size = bp.size;
+ exp_info.flags = O_RDWR;
+ exp_info.priv = buf;
+ buf->dmabuf = dma_buf_export(&exp_info);
+ if (IS_ERR(buf->dmabuf)) {
+ err = PTR_ERR(buf->dmabuf);
+ goto bail;
+ }
+ get_dma_buf(buf->dmabuf);
+ bp.fd = dma_buf_fd(buf->dmabuf, O_ACCMODE);
+ if (bp.fd < 0) {
+ dma_buf_put(buf->dmabuf);
+ err = -EINVAL;
+ goto bail;
+ }
+
+ err = copy_to_user(argp, &bp, sizeof(bp));
+ if (err)
+ goto bail;
+
+ }
+ break;
default:
err = -ENOTTY;
pr_info("bad ioctl: %d\n", cmd);
diff --git a/include/uapi/linux/fastrpc.h b/include/uapi/linux/fastrpc.h
index 6b596fc7ddf3..6b1ca29867fd 100644
--- a/include/uapi/linux/fastrpc.h
+++ b/include/uapi/linux/fastrpc.h
@@ -5,6 +5,8 @@

#include <linux/types.h>

+#define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_ioctl_alloc_dma_buf)
+#define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, uint32_t)
#define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_ioctl_invoke)
#define FASTRPC_IOCTL_INIT _IOWR('R', 4, struct fastrpc_ioctl_init)

@@ -71,4 +73,10 @@ struct fastrpc_ioctl_init {
unsigned int siglen;
};

+struct fastrpc_ioctl_alloc_dma_buf {
+ int fd; /* fd */
+ ssize_t size; /* size */
+ uint32_t flags; /* flags to map with */
+};
+
#endif /* __QCOM_FASTRPC_H__ */
--
2.19.2


2018-11-30 10:49:10

by Srinivas Kandagatla

[permalink] [raw]
Subject: [RFC PATCH 6/6] char: fastrpc: Add support for compat ioctls

From: Thierry Escande <[email protected]>

This patch adds support for compat ioctl from 32 bits userland to
Qualcomm fastrpc driver.

Supported ioctls in this change are INIT, INVOKE, and ALLOC/FREE_DMA.

Most of the work is derived from various downstream Qualcomm kernels.
Credits to various Qualcomm authors who have contributed to this code.
Specially Tharun Kumar Merugu <[email protected]>

Signed-off-by: Thierry Escande <[email protected]>
Signed-off-by: Srinivas Kandagatla <[email protected]>
---
drivers/char/fastrpc.c | 279 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 279 insertions(+)

diff --git a/drivers/char/fastrpc.c b/drivers/char/fastrpc.c
index 3c52502eae9f..0b99a13109ea 100644
--- a/drivers/char/fastrpc.c
+++ b/drivers/char/fastrpc.c
@@ -3,6 +3,7 @@
// Copyright (c) 2018, Linaro Limited

#include <linux/cdev.h>
+#include <linux/compat.h>
#include <linux/completion.h>
#include <linux/device.h>
#include <linux/dma-buf.h>
@@ -1207,10 +1208,288 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
return err;
}

+#ifdef CONFIG_COMPAT
+
+#define FASTRPC_COMPAT_IOCTL_ALLOC_DMA_BUFF \
+ _IOWR('R', 1, struct fastrpc_compat_ioctl_alloc_dma_buf)
+#define FASTRPC_COMPAT_IOCTL_FREE_DMA_BUFF \
+ _IOWR('R', 2, uint32_t)
+#define FASTRPC_COMPAT_IOCTL_INVOKE \
+ _IOWR('R', 3, struct fastrpc_compat_ioctl_invoke)
+#define FASTRPC_COMPAT_IOCTL_INIT \
+ _IOWR('R', 4, struct fastrpc_compat_ioctl_init)
+
+struct compat_remote_buf {
+ compat_uptr_t pv; /* buffer pointer */
+ compat_size_t len; /* length of buffer */
+};
+
+union compat_remote_arg {
+ struct compat_remote_buf buf;
+ compat_uint_t h;
+};
+
+struct fastrpc_compat_ioctl_alloc_dma_buf {
+ compat_int_t fd;
+ compat_ssize_t size;
+ compat_uint_t flags;
+};
+
+struct fastrpc_compat_ioctl_invoke {
+ compat_uint_t handle; /* remote handle */
+ compat_uint_t sc; /* scalars describing the data */
+ compat_uptr_t pra; /* remote arguments list */
+ compat_uptr_t fds; /* fd list */
+ compat_uptr_t attrs; /* attribute list */
+ compat_uptr_t crc; /* crc list */
+};
+
+struct fastrpc_compat_ioctl_init {
+ compat_uint_t flags; /* one of FASTRPC_INIT_* macros */
+ compat_uptr_t file; /* pointer to elf file */
+ compat_int_t filelen; /* elf file length */
+ compat_int_t filefd; /* dmabuf fd for the file */
+ compat_uptr_t mem; /* mem for the PD */
+ compat_int_t memlen; /* mem length */
+ compat_int_t memfd; /* dmabuf fd for the mem */
+ compat_int_t attrs; /* attributes to init process */
+ compat_int_t siglen; /* test signature file length */
+};
+
+static int fastrpc_compat_get_ioctl_alloc_dma_buf(
+ struct fastrpc_compat_ioctl_alloc_dma_buf __user *buf32,
+ struct fastrpc_ioctl_alloc_dma_buf __user *buf)
+{
+ compat_size_t size;
+ compat_uint_t flags;
+ int err;
+
+ err = put_user(0, &buf->fd);
+ err |= get_user(size, &buf32->size);
+ err |= put_user(size, &buf->size);
+ err |= get_user(flags, &buf32->flags);
+ err |= put_user(flags, &buf->flags);
+
+ return err;
+}
+
+static int fastrpc_compat_put_ioctl_alloc_dma_buf(
+ struct fastrpc_compat_ioctl_alloc_dma_buf __user *buf32,
+ struct fastrpc_ioctl_alloc_dma_buf __user *buf)
+{
+ compat_int_t fd;
+ int err;
+
+ err = get_user(fd, &buf->fd);
+ err |= put_user(fd, &buf32->fd);
+
+ return err;
+}
+
+static int compat_get_fastrpc_ioctl_invoke(
+ struct fastrpc_compat_ioctl_invoke __user *inv32,
+ struct fastrpc_ioctl_invoke __user **inva)
+{
+ compat_uint_t u, sc;
+ compat_size_t s;
+ compat_uptr_t p;
+ struct fastrpc_ioctl_invoke *inv;
+ union compat_remote_arg *pra32;
+ union remote_arg *pra;
+ int err, len, j;
+
+ err = get_user(sc, &inv32->sc);
+ if (err)
+ return err;
+
+ len = REMOTE_SCALARS_LENGTH(sc);
+ inv = compat_alloc_user_space(sizeof(*inv) + len * sizeof(*pra));
+ if (!inv)
+ return -EFAULT;
+
+ pra = (union remote_arg *)(inv + 1);
+ err = put_user(pra, &inv->pra);
+ err |= put_user(sc, &inv->sc);
+ err |= get_user(u, &inv32->handle);
+ err |= put_user(u, &inv->handle);
+ err |= get_user(p, &inv32->pra);
+ if (err)
+ return err;
+
+ pra32 = compat_ptr(p);
+ pra = (union remote_arg *)(inv + 1);
+ for (j = 0; j < len; j++) {
+ err |= get_user(p, &pra32[j].buf.pv);
+ err |= put_user(p, (uintptr_t *)&pra[j].buf.pv);
+ err |= get_user(s, &pra32[j].buf.len);
+ err |= put_user(s, &pra[j].buf.len);
+ }
+
+ err |= put_user(NULL, &inv->fds);
+ if (inv32->fds) {
+ err |= get_user(p, &inv32->fds);
+ err |= put_user(p, (compat_uptr_t *)&inv->fds);
+ }
+
+ err |= put_user(NULL, &inv->attrs);
+ if (inv32->attrs) {
+ err |= get_user(p, &inv32->attrs);
+ err |= put_user(p, (compat_uptr_t *)&inv->attrs);
+ }
+
+ err |= put_user(NULL, (compat_uptr_t __user **)&inv->crc);
+ if (inv32->crc) {
+ err |= get_user(p, &inv32->crc);
+ err |= put_user(p, (compat_uptr_t __user *)&inv->crc);
+ }
+
+ *inva = inv;
+
+ return err;
+}
+
+static int compat_get_fastrpc_ioctl_init(
+ struct fastrpc_compat_ioctl_init __user *init32,
+ struct fastrpc_ioctl_init __user *init)
+{
+ compat_uint_t u;
+ compat_uptr_t p;
+ compat_int_t i;
+ int err;
+
+ err = get_user(u, &init32->flags);
+ err |= put_user(u, &init->flags);
+ err |= get_user(p, &init32->file);
+ err |= put_user(p, &init->file);
+ err |= get_user(i, &init32->filelen);
+ err |= put_user(i, &init->filelen);
+ err |= get_user(i, &init32->filefd);
+ err |= put_user(i, &init->filefd);
+ err |= get_user(p, &init32->mem);
+ err |= put_user(p, &init->mem);
+ err |= get_user(i, &init32->memlen);
+ err |= put_user(i, &init->memlen);
+ err |= get_user(i, &init32->memfd);
+ err |= put_user(i, &init->memfd);
+
+ err |= put_user(0, &init->attrs);
+ if (init32->attrs) {
+ err |= get_user(i, &init32->attrs);
+ err |= put_user(i, &init->attrs);
+ }
+
+ err |= put_user(0, &init->siglen);
+ if (init32->siglen) {
+ err |= get_user(i, &init32->siglen);
+ err |= put_user(i, &init->siglen);
+ }
+
+ return err;
+}
+
+static long fastrpc_compat_device_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ int err;
+
+ if (!filp->f_op || !filp->f_op->unlocked_ioctl)
+ return -ENOTTY;
+
+ switch (cmd) {
+ case FASTRPC_COMPAT_IOCTL_ALLOC_DMA_BUFF: {
+ struct fastrpc_compat_ioctl_alloc_dma_buf __user *buf32;
+ struct fastrpc_ioctl_alloc_dma_buf __user *buf;
+
+ buf32 = compat_ptr(arg);
+ buf = compat_alloc_user_space(sizeof(*buf));
+ if (!buf) {
+ err = -EFAULT;
+ break;
+ }
+
+ err = fastrpc_compat_get_ioctl_alloc_dma_buf(buf32, buf);
+ if (err)
+ break;
+
+ err = filp->f_op->unlocked_ioctl(filp,
+ FASTRPC_IOCTL_ALLOC_DMA_BUFF,
+ (unsigned long)buf);
+ if (err)
+ break;
+
+ err = fastrpc_compat_put_ioctl_alloc_dma_buf(buf32, buf);
+ break;
+ }
+ case FASTRPC_COMPAT_IOCTL_FREE_DMA_BUFF: {
+ compat_uptr_t __user *info32;
+ uint32_t __user *info;
+ compat_uint_t u;
+
+ info32 = compat_ptr(arg);
+ info = compat_alloc_user_space(sizeof(*info));
+ if (!info) {
+ err = -EFAULT;
+ break;
+ }
+
+ err = get_user(u, info32);
+ err |= put_user(u, info);
+ if (err)
+ break;
+
+ err = filp->f_op->unlocked_ioctl(filp,
+ FASTRPC_IOCTL_FREE_DMA_BUFF,
+ (unsigned long)info);
+ break;
+ }
+ case FASTRPC_COMPAT_IOCTL_INVOKE: {
+ struct fastrpc_compat_ioctl_invoke __user *inv32;
+ struct fastrpc_ioctl_invoke __user *inv;
+
+ inv32 = compat_ptr(arg);
+
+ err = compat_get_fastrpc_ioctl_invoke(inv32, &inv);
+ if (err)
+ break;
+
+ err = filp->f_op->unlocked_ioctl(filp,
+ FASTRPC_IOCTL_INVOKE, (unsigned long)inv);
+ break;
+ }
+ case FASTRPC_COMPAT_IOCTL_INIT: {
+ struct fastrpc_compat_ioctl_init __user *init32;
+ struct fastrpc_ioctl_init __user *init;
+
+ init32 = compat_ptr(arg);
+ init = compat_alloc_user_space(sizeof(*init));
+ if (!init)
+ return -EFAULT;
+
+ err = compat_get_fastrpc_ioctl_init(init32, init);
+ if (err)
+ return err;
+
+ err = filp->f_op->unlocked_ioctl(filp, FASTRPC_IOCTL_INIT,
+ (unsigned long)init);
+ break;
+ }
+ default:
+ err = -ENOTTY;
+ pr_info("bad ioctl: %d\n", cmd);
+ break;
+ }
+
+ return err;
+}
+#endif /* CONFIG_COMPAT */
+
static const struct file_operations fastrpc_fops = {
.open = fastrpc_device_open,
.release = fastrpc_device_release,
.unlocked_ioctl = fastrpc_device_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = fastrpc_compat_device_ioctl,
+#endif
};

static int fastrpc_cb_probe(struct platform_device *pdev)
--
2.19.2


2018-11-30 10:49:18

by Srinivas Kandagatla

[permalink] [raw]
Subject: [RFC PATCH 3/6] char: fastrpc: Add support for context Invoke method

This patch adds support to compute context invoke method
on the remote processor (DSP).
This involves setting up the functions input and output arguments,
input and output handles and mapping the dmabuf fd for the
argument/handle buffers.

Most of the work is derived from various downstream Qualcomm kernels.
Credits to various Qualcomm authors who have contributed to this code.
Specially Tharun Kumar Merugu <[email protected]>

Signed-off-by: Srinivas Kandagatla <[email protected]>
---
drivers/char/fastrpc.c | 790 +++++++++++++++++++++++++++++++++++
include/uapi/linux/fastrpc.h | 56 +++
2 files changed, 846 insertions(+)
create mode 100644 include/uapi/linux/fastrpc.h

diff --git a/drivers/char/fastrpc.c b/drivers/char/fastrpc.c
index 97d8062eb3e1..5bb224adc24f 100644
--- a/drivers/char/fastrpc.c
+++ b/drivers/char/fastrpc.c
@@ -3,7 +3,9 @@
// Copyright (c) 2018, Linaro Limited

#include <linux/cdev.h>
+#include <linux/completion.h>
#include <linux/device.h>
+#include <linux/dma-buf.h>
#include <linux/dma-mapping.h>
#include <linux/idr.h>
#include <linux/list.h>
@@ -14,6 +16,7 @@
#include <linux/rpmsg.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <uapi/linux/fastrpc.h>

#define ADSP_DOMAIN_ID (0)
#define MDSP_DOMAIN_ID (1)
@@ -21,10 +24,41 @@
#define CDSP_DOMAIN_ID (3)
#define FASTRPC_DEV_MAX 4 /* adsp, mdsp, slpi, cdsp*/
#define FASTRPC_MAX_SESSIONS 9 /*8 compute, 1 cpz*/
+#define FASTRPC_ALIGN 128
+#define FASTRPC_MAX_FDLIST 16
+#define FASTRPC_MAX_CRCLIST 64
+#define FASTRPC_PHYS(p) (p & 0xffffffff)
#define FASTRPC_CTX_MAX (256)
#define FASTRPC_CTXID_MASK (0xFF0)
#define FASTRPC_DEVICE_NAME "fastrpc"

+/* Retrives number of input buffers from the scalars parameter */
+#define REMOTE_SCALARS_INBUFS(sc) (((sc) >> 16) & 0x0ff)
+
+/* Retrives number of output buffers from the scalars parameter */
+#define REMOTE_SCALARS_OUTBUFS(sc) (((sc) >> 8) & 0x0ff)
+
+/* Retrives number of input handles from the scalars parameter */
+#define REMOTE_SCALARS_INHANDLES(sc) (((sc) >> 4) & 0x0f)
+
+/* Retrives number of output handles from the scalars parameter */
+#define REMOTE_SCALARS_OUTHANDLES(sc) ((sc) & 0x0f)
+
+#define REMOTE_SCALARS_LENGTH(sc) (REMOTE_SCALARS_INBUFS(sc) +\
+ REMOTE_SCALARS_OUTBUFS(sc) +\
+ REMOTE_SCALARS_INHANDLES(sc) +\
+ REMOTE_SCALARS_OUTHANDLES(sc))
+
+#define FASTRPC_BUILD_SCALARS(attr, method, in, out, oin, oout) \
+ ((((uint32_t) (attr) & 0x7) << 29) | \
+ (((uint32_t) (method) & 0x1f) << 24) | \
+ (((uint32_t) (in) & 0xff) << 16) | \
+ (((uint32_t) (out) & 0xff) << 8) | \
+ (((uint32_t) (oin) & 0x0f) << 4) | \
+ ((uint32_t) (oout) & 0x0f))
+
+#define FASTRPC_SCALARS(method, in, out) \
+ FASTRPC_BUILD_SCALARS(0, method, in, out, 0, 0)
#define cdev_to_cctx(d) container_of(d, struct fastrpc_channel_ctx, cdev)

static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
@@ -32,6 +66,82 @@ static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
static dev_t fastrpc_major;
static struct class *fastrpc_class;

+struct fastrpc_invoke_header {
+ uint64_t ctx; /* invoke caller context */
+ uint32_t handle; /* handle to invoke */
+ uint32_t sc; /* scalars structure describing the data */
+};
+
+struct fastrpc_phy_page {
+ uint64_t addr; /* physical address */
+ uint64_t size; /* size of contiguous region */
+};
+
+struct fastrpc_invoke_buf {
+ int num; /* number of contiguous regions */
+ int pgidx; /* index to start of contiguous region */
+};
+
+struct fastrpc_invoke {
+ struct fastrpc_invoke_header header;
+ struct fastrpc_phy_page page; /* list of pages address */
+};
+
+struct fastrpc_msg {
+ uint32_t pid; /* process group id */
+ uint32_t tid; /* thread id */
+ struct fastrpc_invoke invoke;
+};
+
+struct fastrpc_invoke_rsp {
+ uint64_t ctx; /* invoke caller context */
+ int retval; /* invoke return value */
+};
+
+struct fastrpc_buf {
+ struct fastrpc_user *fl;
+ struct device *dev;
+ void *virt;
+ uint64_t phys;
+ size_t size;
+};
+
+struct fastrpc_map {
+ struct list_head node;
+ struct fastrpc_user *fl;
+ int fd;
+ struct dma_buf *buf;
+ struct sg_table *table;
+ struct dma_buf_attachment *attach;
+ uint64_t phys;
+ size_t size;
+ uintptr_t va;
+ size_t len;
+ struct kref refcount;
+};
+
+struct fastrpc_invoke_ctx {
+ struct fastrpc_user *fl;
+ struct list_head node; /* list of ctxs */
+ struct completion work;
+ int retval;
+ int pid;
+ int tgid;
+ uint32_t sc;
+ struct fastrpc_msg msg;
+ uint64_t ctxid;
+ size_t used_sz;
+
+ remote_arg_t *lpra;
+ unsigned int *attrs;
+ int *fds;
+ uint32_t *crc;
+
+ remote_arg64_t *rpra;
+ struct fastrpc_map **maps;
+ struct fastrpc_buf *buf;
+};
+
struct fastrpc_session_ctx {
struct device *dev;
int sid;
@@ -59,6 +169,7 @@ struct fastrpc_user {

struct fastrpc_channel_ctx *cctx;
struct fastrpc_session_ctx *sctx;
+ struct fastrpc_buf *init_mem;

int tgid;
int pd;
@@ -69,6 +180,590 @@ struct fastrpc_user {
struct device *dev;
};

+static void fastrpc_free_map(struct kref *ref)
+{
+ struct fastrpc_map *map;
+
+ map = container_of(ref, struct fastrpc_map, refcount);
+
+ list_del(&map->node);
+
+ if (map->table) {
+ dma_buf_unmap_attachment(map->attach, map->table,
+ DMA_BIDIRECTIONAL);
+ dma_buf_detach(map->buf, map->attach);
+ dma_buf_put(map->buf);
+ }
+
+ kfree(map);
+}
+
+static void fastrpc_map_put(struct fastrpc_map *map)
+{
+ struct fastrpc_user *fl;
+
+ if (map) {
+ fl = map->fl;
+ mutex_lock(&fl->mutex);
+ kref_put(&map->refcount, fastrpc_free_map);
+ mutex_unlock(&fl->mutex);
+ }
+}
+
+static int fastrpc_map_get(struct fastrpc_user *fl, int fd,
+ uintptr_t va, size_t len,
+ struct fastrpc_map **ppmap)
+{
+ struct fastrpc_map *map = NULL, *n;
+
+ mutex_lock(&fl->mutex);
+ list_for_each_entry_safe(map, n, &fl->maps, node) {
+ if (map->fd == fd) {
+ kref_get(&map->refcount);
+ *ppmap = map;
+ mutex_unlock(&fl->mutex);
+ return 0;
+ }
+ }
+ mutex_unlock(&fl->mutex);
+
+ return -ENOENT;
+}
+
+static void fastrpc_buf_free(struct fastrpc_buf *buf)
+{
+ dma_free_coherent(buf->dev, buf->size, buf->virt,
+ FASTRPC_PHYS(buf->phys));
+ kfree(buf);
+}
+
+static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
+ size_t size, struct fastrpc_buf **obuf)
+{
+ struct fastrpc_buf *buf;
+
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->fl = fl;
+ buf->virt = NULL;
+ buf->phys = 0;
+ buf->size = size;
+ buf->dev = dev;
+
+ buf->virt = dma_alloc_coherent(dev, buf->size, (dma_addr_t *)&buf->phys,
+ GFP_KERNEL);
+ if (!buf->virt)
+ return -ENOMEM;
+
+ if (fl->sctx && fl->sctx->sid)
+ buf->phys += ((uint64_t)fl->sctx->sid << 32);
+
+ *obuf = buf;
+
+ return 0;
+}
+
+static void fastrpc_context_free(struct fastrpc_invoke_ctx *ctx)
+{
+ struct fastrpc_channel_ctx *cctx = ctx->fl->cctx;
+ struct fastrpc_user *user = ctx->fl;
+ int scalars = REMOTE_SCALARS_LENGTH(ctx->sc);
+ int i;
+
+ spin_lock(&user->lock);
+ list_del(&ctx->node);
+ spin_unlock(&user->lock);
+
+ for (i = 0; i < scalars; i++) {
+ if (ctx->maps[i])
+ fastrpc_map_put(ctx->maps[i]);
+ }
+
+ if (ctx->buf)
+ fastrpc_buf_free(ctx->buf);
+
+ spin_lock(&cctx->lock);
+ idr_remove(&cctx->ctx_idr, ctx->ctxid >> 4);
+ spin_unlock(&cctx->lock);
+
+ kfree(ctx);
+}
+
+static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
+ struct fastrpc_user *user,
+ uint32_t kernel,
+ struct fastrpc_ioctl_invoke *inv)
+{
+ struct fastrpc_channel_ctx *cctx = user->cctx;
+ struct fastrpc_invoke_ctx *ctx = NULL;
+ int bufs, size, ret;
+ int err = 0;
+
+ bufs = REMOTE_SCALARS_LENGTH(inv->sc);
+ size = (sizeof(*ctx->lpra) + sizeof(*ctx->maps) +
+ sizeof(*ctx->fds) + sizeof(*ctx->attrs)) * bufs;
+
+ ctx = kzalloc(sizeof(*ctx) + size, GFP_KERNEL);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&ctx->node);
+ ctx->fl = user;
+ ctx->maps = (struct fastrpc_map **)(&ctx[1]);
+ ctx->lpra = (remote_arg_t *)(&ctx->maps[bufs]);
+ ctx->fds = (int *)(&ctx->lpra[bufs]);
+ ctx->attrs = (unsigned int *)(&ctx->fds[bufs]);
+
+ if (!kernel) {
+ if (copy_from_user(ctx->lpra,
+ (void const __user *)inv->pra,
+ bufs * sizeof(*ctx->lpra))) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ if (inv->fds) {
+ if (copy_from_user(ctx->fds,
+ (void const __user *)inv->fds,
+ bufs * sizeof(*ctx->fds))) {
+ err = -EFAULT;
+ goto err;
+ }
+ }
+ if (inv->attrs) {
+ if (copy_from_user(
+ ctx->attrs,
+ (void const __user *)inv->attrs,
+ bufs * sizeof(*ctx->attrs))) {
+ err = -EFAULT;
+ goto err;
+ }
+ }
+ } else {
+ memcpy(ctx->lpra, inv->pra, bufs * sizeof(*ctx->lpra));
+ if (inv->fds)
+ memcpy(ctx->fds, inv->fds,
+ bufs * sizeof(*ctx->fds));
+ if (inv->attrs)
+ memcpy(ctx->attrs, inv->attrs,
+ bufs * sizeof(*ctx->attrs));
+ }
+
+ ctx->crc = (uint32_t *)inv->crc;
+ ctx->sc = inv->sc;
+ ctx->retval = -1;
+ ctx->pid = current->pid;
+ ctx->tgid = user->tgid;
+ init_completion(&ctx->work);
+
+ spin_lock(&user->lock);
+ list_add_tail(&ctx->node, &user->pending);
+ spin_unlock(&user->lock);
+
+ spin_lock(&cctx->lock);
+ ret = idr_alloc_cyclic(&cctx->ctx_idr, ctx, 1,
+ FASTRPC_CTX_MAX, GFP_ATOMIC);
+ if (ret < 0) {
+ spin_unlock(&cctx->lock);
+ err = ret;
+ goto err_idr;
+ }
+ ctx->ctxid = ret << 4;
+ spin_unlock(&cctx->lock);
+
+ return ctx;
+err_idr:
+ spin_lock(&user->lock);
+ list_del(&ctx->node);
+ spin_unlock(&user->lock);
+err:
+ kfree(ctx);
+
+ return ERR_PTR(err);
+}
+
+static int fastrpc_map_create(struct fastrpc_user *fl, int fd, uintptr_t va,
+ size_t len, struct fastrpc_map **ppmap)
+{
+ struct fastrpc_session_ctx *sess = fl->sctx;
+ struct fastrpc_map *map = NULL;
+ int err = 0;
+
+ if (!fastrpc_map_get(fl, fd, va, len, ppmap))
+ return 0;
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&map->node);
+ map->fl = fl;
+ map->fd = fd;
+ map->buf = dma_buf_get(fd);
+ if (!map->buf) {
+ err = -EINVAL;
+ goto get_err;
+ }
+
+ map->attach = dma_buf_attach(map->buf, sess->dev);
+ if (IS_ERR(map->attach)) {
+ dev_err(sess->dev, "Failed to attach dmabuf\n");
+ err = PTR_ERR(map->attach);
+ goto attach_err;
+ }
+
+ map->table = dma_buf_map_attachment(map->attach,
+ DMA_BIDIRECTIONAL);
+ if (IS_ERR(map->table)) {
+ err = PTR_ERR(map->table);
+ goto map_err;
+ }
+
+ map->phys = sg_dma_address(map->table->sgl);
+ map->phys += ((uint64_t)fl->sctx->sid << 32);
+ map->size = len;
+ map->va = (uintptr_t)sg_virt(map->table->sgl);
+ map->len = len;
+ kref_init(&map->refcount);
+
+ spin_lock(&fl->lock);
+ list_add_tail(&map->node, &fl->maps);
+ spin_unlock(&fl->lock);
+ *ppmap = map;
+
+ return 0;
+
+map_err:
+ dma_buf_detach(map->buf, map->attach);
+attach_err:
+ dma_buf_put(map->buf);
+get_err:
+ kfree(map);
+
+ return err;
+}
+
+static inline struct fastrpc_invoke_buf *fastrpc_invoke_buf_start(
+ remote_arg64_t *pra,
+ uint32_t sc)
+{
+ return (struct fastrpc_invoke_buf *)(&pra[REMOTE_SCALARS_LENGTH(sc)]);
+}
+
+static inline struct fastrpc_phy_page *fastrpc_phy_page_start(uint32_t sc,
+ struct fastrpc_invoke_buf *buf)
+{
+ return (struct fastrpc_phy_page *)(&buf[REMOTE_SCALARS_LENGTH(sc)]);
+}
+
+static int fastrpc_get_args(uint32_t kernel, struct fastrpc_invoke_ctx *ctx)
+{
+ remote_arg64_t *rpra;
+ remote_arg_t *lpra = ctx->lpra;
+ struct fastrpc_invoke_buf *list;
+ struct fastrpc_phy_page *pages;
+ uint32_t sc = ctx->sc;
+ uintptr_t args;
+ size_t rlen = 0, copylen = 0, metalen = 0;
+ int inbufs, handles, bufs, i, err = 0;
+ uint64_t *fdlist;
+ uint32_t *crclist;
+
+ inbufs = REMOTE_SCALARS_INBUFS(sc);
+ bufs = inbufs + REMOTE_SCALARS_OUTBUFS(sc);
+ handles = REMOTE_SCALARS_INHANDLES(sc) + REMOTE_SCALARS_OUTHANDLES(sc);
+ metalen = (bufs + handles) * (sizeof(remote_arg64_t) +
+ sizeof(struct fastrpc_invoke_buf) +
+ sizeof(struct fastrpc_phy_page)) +
+ sizeof(uint64_t) * FASTRPC_MAX_FDLIST +
+ sizeof(uint32_t) * FASTRPC_MAX_CRCLIST;
+
+ copylen = metalen;
+
+ for (i = 0; i < bufs + handles; ++i) {
+ uintptr_t buf = (uintptr_t)lpra[i].buf.pv;
+ size_t len = lpra[i].buf.len;
+
+ if (i < bufs) {
+ if (ctx->fds[i] && (ctx->fds[i] != -1))
+ fastrpc_map_create(ctx->fl, ctx->fds[i], buf,
+ len, &ctx->maps[i]);
+
+ if (!len)
+ continue;
+
+ if (ctx->maps[i])
+ continue;
+
+ copylen = ALIGN(copylen, FASTRPC_ALIGN);
+ copylen += len;
+ } else {
+ err = fastrpc_map_create(ctx->fl, ctx->fds[i], 0,
+ 0, &ctx->maps[i]);
+ if (err)
+ goto bail;
+ }
+ }
+ ctx->used_sz = copylen;
+
+ /* allocate new buffer */
+ if (copylen) {
+ err = fastrpc_buf_alloc(ctx->fl, ctx->fl->sctx->dev,
+ copylen, &ctx->buf);
+ if (err)
+ goto bail;
+ }
+
+ /* copy metadata */
+ rpra = ctx->buf->virt;
+ ctx->rpra = rpra;
+ list = fastrpc_invoke_buf_start(rpra, sc);
+ pages = fastrpc_phy_page_start(sc, list);
+ args = (uintptr_t)ctx->buf->virt + metalen;
+ fdlist = (uint64_t *)&pages[bufs + handles];
+ memset(fdlist, 0, sizeof(uint32_t)*FASTRPC_MAX_FDLIST);
+ crclist = (uint32_t *)&fdlist[FASTRPC_MAX_FDLIST];
+ memset(crclist, 0, sizeof(uint32_t)*FASTRPC_MAX_CRCLIST);
+ rlen = copylen - metalen;
+
+ for (i = 0; i < bufs; ++i) {
+ struct fastrpc_map *map = ctx->maps[i];
+ size_t len = lpra[i].buf.len;
+ size_t mlen;
+
+ if (len)
+ list[i].num = 1;
+ else
+ list[i].num = 0;
+
+ list[i].pgidx = i;
+
+ rpra[i].buf.pv = 0;
+ rpra[i].buf.len = len;
+ if (!len)
+ continue;
+ if (map) {
+ uintptr_t offset = 0;
+ uint64_t num = roundup(len,
+ PAGE_SIZE) / PAGE_SIZE;
+ int idx = list[i].pgidx;
+
+ pages[idx].addr = map->phys + offset;
+ pages[idx].size = num << PAGE_SHIFT;
+ rpra[i].buf.pv =
+ (uint64_t)((uintptr_t)lpra[i].buf.pv);
+ } else {
+ rlen -= ALIGN(args, FASTRPC_ALIGN) - args;
+ args = ALIGN(args, FASTRPC_ALIGN);
+ mlen = len;
+ if (rlen < mlen)
+ goto bail;
+
+ rpra[i].buf.pv = (args);
+ pages[list[i].pgidx].addr = ctx->buf->phys +
+ (copylen - rlen);
+ pages[list[i].pgidx].addr = pages[list[i].pgidx].addr &
+ PAGE_MASK;
+ pages[list[i].pgidx].size = roundup(len, PAGE_SIZE);
+
+ if (i < inbufs) {
+ if (!kernel) {
+ err = copy_from_user(
+ (void *)rpra[i].buf.pv,
+ (void const __user *)lpra[i].buf.pv,
+ len);
+ if (err)
+ goto bail;
+ } else {
+ memcpy((void *)rpra[i].buf.pv,
+ lpra[i].buf.pv, len);
+ }
+ }
+ args = args + mlen;
+ rlen -= mlen;
+ }
+ }
+
+ for (i = bufs; i < handles; ++i) {
+ struct fastrpc_map *map = ctx->maps[i];
+ size_t len = lpra[i].buf.len;
+
+ if (len)
+ list[i].num = 1;
+ else
+ list[i].num = 0;
+
+ list[i].pgidx = i;
+
+ pages[i].addr = map->phys;
+ pages[i].size = map->size;
+ rpra[i].dma.fd = ctx->fds[i];
+ rpra[i].dma.len = len;
+ rpra[i].dma.offset = (uint32_t)(uintptr_t)lpra[i].buf.pv;
+ }
+
+bail:
+ return err;
+}
+
+static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
+ uint32_t kernel, remote_arg_t *upra)
+{
+ remote_arg64_t *rpra = ctx->rpra;
+ int i, inbufs, outbufs, handles;
+ struct fastrpc_invoke_buf *list;
+ struct fastrpc_phy_page *pages;
+ struct fastrpc_map *mmap;
+ uint32_t sc = ctx->sc;
+ uint64_t *fdlist;
+ uint32_t *crclist;
+ int err = 0;
+
+ inbufs = REMOTE_SCALARS_INBUFS(sc);
+ outbufs = REMOTE_SCALARS_OUTBUFS(sc);
+ handles = REMOTE_SCALARS_INHANDLES(sc) + REMOTE_SCALARS_OUTHANDLES(sc);
+ list = fastrpc_invoke_buf_start(ctx->rpra, sc);
+ pages = fastrpc_phy_page_start(sc, list);
+ fdlist = (uint64_t *)(pages + inbufs + outbufs + handles);
+ crclist = (uint32_t *)(fdlist + FASTRPC_MAX_FDLIST);
+
+ for (i = inbufs; i < inbufs + outbufs; ++i) {
+ if (!ctx->maps[i]) {
+ if (!kernel)
+ err =
+ copy_to_user((void __user *)ctx->lpra[i].buf.pv,
+ (void *)rpra[i].buf.pv, rpra[i].buf.len);
+ else
+ memcpy(ctx->lpra[i].buf.pv,
+ (void *)rpra[i].buf.pv, rpra[i].buf.len);
+
+ if (err)
+ goto bail;
+ } else {
+ fastrpc_map_put(ctx->maps[i]);
+ ctx->maps[i] = NULL;
+ }
+ }
+
+ if (inbufs + outbufs + handles) {
+ for (i = 0; i < FASTRPC_MAX_FDLIST; i++) {
+ if (!fdlist[i])
+ break;
+ if (!fastrpc_map_get(ctx->fl, (int)fdlist[i], 0,
+ 0, &mmap))
+ fastrpc_map_put(mmap);
+ }
+ }
+
+ if (ctx->crc && crclist) {
+ if (!kernel)
+ err = copy_to_user((void __user *)ctx->crc, crclist,
+ FASTRPC_MAX_CRCLIST*sizeof(uint32_t));
+ else
+ memcpy(ctx->crc, crclist,
+ FASTRPC_MAX_CRCLIST*sizeof(uint32_t));
+ }
+
+bail:
+ return err;
+}
+
+static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
+ struct fastrpc_invoke_ctx *ctx,
+ uint32_t kernel, uint32_t handle)
+{
+ struct fastrpc_channel_ctx *cctx;
+ struct fastrpc_user *fl = ctx->fl;
+ struct fastrpc_msg *msg = &ctx->msg;
+
+ cctx = fl->cctx;
+ msg->pid = fl->tgid;
+ msg->tid = current->pid;
+
+ if (kernel)
+ msg->pid = 0;
+
+ msg->invoke.header.ctx = ctx->ctxid | fl->pd;
+ msg->invoke.header.handle = handle;
+ msg->invoke.header.sc = ctx->sc;
+ msg->invoke.page.addr = ctx->buf ? ctx->buf->phys : 0;
+ msg->invoke.page.size = roundup(ctx->used_sz, PAGE_SIZE);
+
+ return rpmsg_send(cctx->rpdev->ept, (void *)msg, sizeof(*msg));
+}
+
+static int fastrpc_internal_invoke(struct fastrpc_user *fl,
+ uint32_t kernel,
+ struct fastrpc_ioctl_invoke *inv)
+{
+ struct fastrpc_invoke_ctx *ctx = NULL;
+ int err = 0;
+
+ if (!fl->sctx)
+ return -EINVAL;
+
+ ctx = fastrpc_context_alloc(fl, kernel, inv);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ if (REMOTE_SCALARS_LENGTH(ctx->sc)) {
+ err = fastrpc_get_args(kernel, ctx);
+ if (err)
+ goto bail;
+ }
+
+ err = fastrpc_invoke_send(fl->sctx, ctx, kernel, inv->handle);
+ if (err)
+ goto bail;
+
+ err = wait_for_completion_interruptible(&ctx->work);
+ if (err)
+ goto bail;
+
+ err = ctx->retval;
+ if (err)
+ goto bail;
+
+ err = fastrpc_put_args(ctx, kernel, inv->pra);
+ if (err)
+ goto bail;
+bail:
+ if (ctx)
+ fastrpc_context_free(ctx);
+
+ return err;
+}
+static struct fastrpc_session_ctx *fastrpc_session_alloc(
+ struct fastrpc_channel_ctx *cctx,
+ int secure)
+{
+ struct fastrpc_session_ctx *session = NULL;
+ int i;
+
+ spin_lock(&cctx->lock);
+ for (i = 0; i < cctx->sesscount; i++) {
+ if (!cctx->session[i].used && cctx->session[i].valid &&
+ cctx->session[i].secure == secure) {
+ cctx->session[i].used = true;
+ session = &cctx->session[i];
+ break;
+ }
+ }
+ spin_unlock(&cctx->lock);
+
+ return session;
+}
+
+static void fastrpc_session_free(struct fastrpc_channel_ctx *cctx,
+ struct fastrpc_session_ctx *session)
+{
+ spin_lock(&cctx->lock);
+ session->used = false;
+ spin_unlock(&cctx->lock);
+}
+
static const struct of_device_id fastrpc_match_table[] = {
{ .compatible = "qcom,fastrpc-compute-cb", },
{}
@@ -78,11 +773,26 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
{
struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
struct fastrpc_channel_ctx *cctx = cdev_to_cctx(inode->i_cdev);
+ struct fastrpc_invoke_ctx *ctx, *n;
+ struct fastrpc_map *map, *m;

spin_lock(&cctx->lock);
list_del(&fl->user);
spin_unlock(&cctx->lock);

+ if (fl->init_mem)
+ fastrpc_buf_free(fl->init_mem);
+
+ list_for_each_entry_safe(ctx, n, &fl->pending, node)
+ fastrpc_context_free(ctx);
+
+ list_for_each_entry_safe(map, m, &fl->maps, node)
+ fastrpc_map_put(map);
+
+ if (fl->sctx)
+ fastrpc_session_free(fl->cctx, fl->sctx);
+
+ mutex_destroy(&fl->mutex);
kfree(fl);
file->private_data = NULL;

@@ -116,9 +826,48 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp)
return 0;
}

+static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
+ struct fastrpc_channel_ctx *cctx = fl->cctx;
+ char __user *argp = (char __user *)arg;
+ int err;
+
+ if (!fl->sctx) {
+ fl->sctx = fastrpc_session_alloc(cctx, 0);
+ if (!fl->sctx)
+ return -ENOENT;
+ }
+
+ switch (cmd) {
+ case FASTRPC_IOCTL_INVOKE: {
+ struct fastrpc_ioctl_invoke inv;
+
+ inv.fds = NULL;
+ inv.attrs = NULL;
+ inv.crc = NULL;
+ err = copy_from_user(&inv, argp, sizeof(inv));
+ if (err)
+ goto bail;
+ err = fastrpc_internal_invoke(fl, 0, &inv);
+ if (err)
+ goto bail;
+ break;
+ }
+default:
+ err = -ENOTTY;
+ pr_info("bad ioctl: %d\n", cmd);
+ break;
+ }
+bail:
+ return err;
+}
+
static const struct file_operations fastrpc_fops = {
.open = fastrpc_device_open,
.release = fastrpc_device_release,
+ .unlocked_ioctl = fastrpc_device_ioctl,
};

static int fastrpc_cb_probe(struct platform_device *pdev)
@@ -251,9 +1000,25 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
return err;
}

+static void fastrpc_notify_users(struct fastrpc_user *user)
+{
+ struct fastrpc_invoke_ctx *ctx, *n;
+
+ spin_lock(&user->lock);
+ list_for_each_entry_safe(ctx, n, &user->pending, node)
+ complete(&ctx->work);
+ spin_unlock(&user->lock);
+}
+
static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
{
struct fastrpc_channel_ctx *cctx = dev_get_drvdata(&rpdev->dev);
+ struct fastrpc_user *user, *n;
+
+ spin_lock(&cctx->lock);
+ list_for_each_entry_safe(user, n, &cctx->users, user)
+ fastrpc_notify_users(user);
+ spin_unlock(&cctx->lock);

device_del(&cctx->dev);
put_device(&cctx->dev);
@@ -264,6 +1029,31 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
int len, void *priv, u32 addr)
{
+ struct fastrpc_channel_ctx *cctx = dev_get_drvdata(&rpdev->dev);
+ struct fastrpc_invoke_rsp *rsp = data;
+ struct fastrpc_invoke_ctx *ctx;
+ unsigned long flags;
+ int ctxid;
+
+ if (rsp && len < sizeof(*rsp)) {
+ dev_err(&rpdev->dev, "invalid response or context\n");
+ return -EINVAL;
+ }
+
+ ctxid = (uint32_t)((rsp->ctx & FASTRPC_CTXID_MASK) >> 4);
+
+ spin_lock_irqsave(&cctx->lock, flags);
+ ctx = idr_find(&cctx->ctx_idr, ctxid);
+ spin_unlock_irqrestore(&cctx->lock, flags);
+
+ if (!ctx) {
+ dev_err(&rpdev->dev, "No context ID matches response\n");
+ return -ENOENT;
+ }
+
+ ctx->retval = rsp->retval;
+ complete(&ctx->work);
+
return 0;
}

diff --git a/include/uapi/linux/fastrpc.h b/include/uapi/linux/fastrpc.h
new file mode 100644
index 000000000000..8fec66601337
--- /dev/null
+++ b/include/uapi/linux/fastrpc.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __QCOM_FASTRPC_H__
+#define __QCOM_FASTRPC_H__
+
+#include <linux/types.h>
+
+#define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_ioctl_invoke)
+
+#define remote_arg64_t union remote_arg64
+
+struct remote_buf64 {
+ uint64_t pv;
+ uint64_t len;
+};
+
+struct remote_dma_handle64 {
+ int fd;
+ uint32_t offset;
+ uint32_t len;
+};
+
+union remote_arg64 {
+ struct remote_buf64 buf;
+ struct remote_dma_handle64 dma;
+ uint32_t h;
+};
+
+#define remote_arg_t union remote_arg
+
+struct remote_buf {
+ void *pv; /* buffer pointer */
+ size_t len; /* length of buffer */
+};
+
+struct remote_dma_handle {
+ int fd;
+ uint32_t offset;
+};
+
+union remote_arg {
+ struct remote_buf buf; /* buffer info */
+ struct remote_dma_handle dma;
+ uint32_t h; /* remote handle */
+};
+
+struct fastrpc_ioctl_invoke {
+ uint32_t handle; /* remote handle */
+ uint32_t sc; /* scalars describing the data */
+ remote_arg_t *pra; /* remote arguments list */
+ int *fds; /* fd list */
+ unsigned int *attrs; /* attribute list */
+ unsigned int *crc;
+};
+
+#endif /* __QCOM_FASTRPC_H__ */
--
2.19.2


2018-11-30 10:49:25

by Srinivas Kandagatla

[permalink] [raw]
Subject: [RFC PATCH 2/6] char: fastrpc: Add Qualcomm fastrpc basic driver model

This patch adds basic driver model for qualcomm fastrpc.
Each DSP rpmsg channel is represented as fastrpc channel context and
is exposed as a character driver for userspace interface.
Each compute context bank is represented as fastrpc-session-context,
which are dynamically managed by the channel context char device.

Signed-off-by: Srinivas Kandagatla <[email protected]>
---
drivers/char/Kconfig | 10 ++
drivers/char/Makefile | 1 +
drivers/char/fastrpc.c | 337 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 348 insertions(+)
create mode 100644 drivers/char/fastrpc.c

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 9d03b2ff5df6..75fd274c67df 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -552,6 +552,16 @@ config ADI
and SSM (Silicon Secured Memory). Intended consumers of this
driver include crash and makedumpfile.

+config QCOM_FASTRPC
+ tristate "Qualcomm FastRPC"
+ depends on ARCH_QCOM || COMPILE_TEST
+ depends on RPMSG
+ help
+ Provides a communication mechanism that allows for clients to
+ make remote method invocations across processor boundary to
+ applications DSP processor. Say M if you want to enable this
+ module.
+
endmenu

config RANDOM_TRUST_CPU
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index b8d42b4e979b..30ec9187350e 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -58,3 +58,4 @@ js-rtc-y = rtc.o
obj-$(CONFIG_XILLYBUS) += xillybus/
obj-$(CONFIG_POWERNV_OP_PANEL) += powernv-op-panel.o
obj-$(CONFIG_ADI) += adi.o
+obj-$(CONFIG_QCOM_FASTRPC) += fastrpc.o
diff --git a/drivers/char/fastrpc.c b/drivers/char/fastrpc.c
new file mode 100644
index 000000000000..97d8062eb3e1
--- /dev/null
+++ b/drivers/char/fastrpc.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2018, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/rpmsg.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+#define ADSP_DOMAIN_ID (0)
+#define MDSP_DOMAIN_ID (1)
+#define SDSP_DOMAIN_ID (2)
+#define CDSP_DOMAIN_ID (3)
+#define FASTRPC_DEV_MAX 4 /* adsp, mdsp, slpi, cdsp*/
+#define FASTRPC_MAX_SESSIONS 9 /*8 compute, 1 cpz*/
+#define FASTRPC_CTX_MAX (256)
+#define FASTRPC_CTXID_MASK (0xFF0)
+#define FASTRPC_DEVICE_NAME "fastrpc"
+
+#define cdev_to_cctx(d) container_of(d, struct fastrpc_channel_ctx, cdev)
+
+static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
+ "sdsp", "cdsp"};
+static dev_t fastrpc_major;
+static struct class *fastrpc_class;
+
+struct fastrpc_session_ctx {
+ struct device *dev;
+ int sid;
+ bool used;
+ bool valid;
+ bool secure;
+};
+
+struct fastrpc_channel_ctx {
+ int domain_id;
+ int sesscount;
+ struct rpmsg_device *rpdev;
+ struct fastrpc_session_ctx session[FASTRPC_MAX_SESSIONS];
+ spinlock_t lock;
+ struct idr ctx_idr;
+ struct list_head users;
+ struct cdev cdev;
+ struct device dev;
+};
+
+struct fastrpc_user {
+ struct list_head user;
+ struct list_head maps;
+ struct list_head pending;
+
+ struct fastrpc_channel_ctx *cctx;
+ struct fastrpc_session_ctx *sctx;
+
+ int tgid;
+ int pd;
+ /* Lock for lists */
+ spinlock_t lock;
+ /* lock for allocations */
+ struct mutex mutex;
+ struct device *dev;
+};
+
+static const struct of_device_id fastrpc_match_table[] = {
+ { .compatible = "qcom,fastrpc-compute-cb", },
+ {}
+};
+
+static int fastrpc_device_release(struct inode *inode, struct file *file)
+{
+ struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
+ struct fastrpc_channel_ctx *cctx = cdev_to_cctx(inode->i_cdev);
+
+ spin_lock(&cctx->lock);
+ list_del(&fl->user);
+ spin_unlock(&cctx->lock);
+
+ kfree(fl);
+ file->private_data = NULL;
+
+ return 0;
+}
+
+static int fastrpc_device_open(struct inode *inode, struct file *filp)
+{
+ struct fastrpc_channel_ctx *cctx = cdev_to_cctx(inode->i_cdev);
+ struct fastrpc_user *fl = NULL;
+
+ fl = kzalloc(sizeof(*fl), GFP_KERNEL);
+ if (!fl)
+ return -ENOMEM;
+
+ filp->private_data = fl;
+
+ spin_lock_init(&fl->lock);
+ mutex_init(&fl->mutex);
+ INIT_LIST_HEAD(&fl->pending);
+ INIT_LIST_HEAD(&fl->maps);
+ INIT_LIST_HEAD(&fl->user);
+
+ fl->tgid = current->tgid;
+ fl->cctx = cctx;
+ fl->dev = &cctx->rpdev->dev;
+ spin_lock(&cctx->lock);
+ list_add_tail(&fl->user, &cctx->users);
+ spin_unlock(&cctx->lock);
+
+ return 0;
+}
+
+static const struct file_operations fastrpc_fops = {
+ .open = fastrpc_device_open,
+ .release = fastrpc_device_release,
+};
+
+static int fastrpc_cb_probe(struct platform_device *pdev)
+{
+ struct fastrpc_channel_ctx *cctx;
+ struct fastrpc_session_ctx *sess;
+ struct device *dev = &pdev->dev;
+ int i, sessions = 0;
+
+ cctx = dev_get_drvdata(dev->parent);
+ if (!cctx)
+ return -EINVAL;
+
+ of_property_read_u32(dev->of_node, "nsessions", &sessions);
+
+ spin_lock(&cctx->lock);
+ sess = &cctx->session[cctx->sesscount];
+ sess->used = false;
+ sess->valid = true;
+ sess->dev = dev;
+ dev_set_drvdata(dev, sess);
+ sess->secure = of_property_read_bool(dev->of_node, "secured");
+
+ if (of_property_read_u32(dev->of_node, "reg", &sess->sid))
+ dev_err(dev, "FastRPC Session ID not specified in DT\n");
+
+ if (sessions > 0) {
+ struct fastrpc_session_ctx *dup_sess;
+
+ for (i = 1; i < sessions; i++) {
+ if (cctx->sesscount++ >= FASTRPC_MAX_SESSIONS)
+ break;
+ dup_sess = &cctx->session[cctx->sesscount];
+ memcpy(dup_sess, sess, sizeof(*dup_sess));
+ }
+ }
+ cctx->sesscount++;
+ spin_unlock(&cctx->lock);
+ dma_set_mask(dev, DMA_BIT_MASK(32));
+
+ return 0;
+}
+
+static int fastrpc_cb_remove(struct platform_device *pdev)
+{
+ struct fastrpc_channel_ctx *cctx = dev_get_drvdata(pdev->dev.parent);
+ struct fastrpc_session_ctx *sess = dev_get_drvdata(&pdev->dev);
+ int i;
+
+ spin_lock(&cctx->lock);
+ for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) {
+ if (cctx->session[i].sid == sess->sid) {
+ cctx->session[i].valid = false;
+ cctx->sesscount--;
+ }
+ }
+ spin_unlock(&cctx->lock);
+
+ return 0;
+}
+
+static struct platform_driver fastrpc_cb_driver = {
+ .probe = fastrpc_cb_probe,
+ .remove = fastrpc_cb_remove,
+ .driver = {
+ .name = "fastrpc",
+ .owner = THIS_MODULE,
+ .of_match_table = fastrpc_match_table,
+ .suppress_bind_attrs = true,
+ },
+};
+
+static void fastrpc_cdev_release_device(struct device *dev)
+{
+ struct fastrpc_channel_ctx *data = dev_get_drvdata(dev->parent);
+
+ cdev_del(&data->cdev);
+}
+
+static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
+{
+ struct device *rdev = &rpdev->dev;
+ struct fastrpc_channel_ctx *data;
+ struct device *dev;
+ int err, domain_id;
+
+ data = devm_kzalloc(rdev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = of_property_read_u32(rdev->of_node, "reg", &domain_id);
+ if (err) {
+ dev_err(rdev, "FastRPC Domain ID not specified in DT\n");
+ return err;
+ }
+
+ if (domain_id > CDSP_DOMAIN_ID) {
+ dev_err(rdev, "FastRPC Invalid Domain ID %d\n", domain_id);
+ return -EINVAL;
+ }
+
+ dev = &data->dev;
+ device_initialize(dev);
+ dev->parent = &rpdev->dev;
+ dev->class = fastrpc_class;
+
+ cdev_init(&data->cdev, &fastrpc_fops);
+ data->cdev.owner = THIS_MODULE;
+ dev->devt = MKDEV(MAJOR(fastrpc_major), domain_id);
+ dev->id = domain_id;
+ dev_set_name(&data->dev, "fastrpc-%s", domains[domain_id]);
+ dev->release = fastrpc_cdev_release_device;
+
+ err = cdev_device_add(&data->cdev, &data->dev);
+ if (err)
+ goto cdev_err;
+
+ dev_set_drvdata(&rpdev->dev, data);
+ dma_set_mask_and_coherent(rdev, DMA_BIT_MASK(32));
+ INIT_LIST_HEAD(&data->users);
+ spin_lock_init(&data->lock);
+ idr_init(&data->ctx_idr);
+ data->domain_id = domain_id;
+ data->rpdev = rpdev;
+
+ return of_platform_populate(rdev->of_node, NULL, NULL, rdev);
+
+cdev_err:
+ put_device(dev);
+ return err;
+}
+
+static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
+{
+ struct fastrpc_channel_ctx *cctx = dev_get_drvdata(&rpdev->dev);
+
+ device_del(&cctx->dev);
+ put_device(&cctx->dev);
+ of_platform_depopulate(&rpdev->dev);
+ kfree(cctx);
+}
+
+static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
+ int len, void *priv, u32 addr)
+{
+ return 0;
+}
+
+static const struct of_device_id fastrpc_rpmsg_of_match[] = {
+ { .compatible = "qcom,fastrpc" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, fastrpc_rpmsg_of_match);
+
+static struct rpmsg_driver fastrpc_driver = {
+ .probe = fastrpc_rpmsg_probe,
+ .remove = fastrpc_rpmsg_remove,
+ .callback = fastrpc_rpmsg_callback,
+ .drv = {
+ .name = "qcom,msm_fastrpc_rpmsg",
+ .of_match_table = fastrpc_rpmsg_of_match,
+ },
+};
+
+static int fastrpc_init(void)
+{
+ int ret;
+
+ ret = alloc_chrdev_region(&fastrpc_major, 0, FASTRPC_DEV_MAX,
+ FASTRPC_DEVICE_NAME);
+ if (ret < 0) {
+ pr_err("fastrpc: failed to allocate char dev region\n");
+ return ret;
+ }
+
+ fastrpc_class = class_create(THIS_MODULE, "fastrpc");
+ if (IS_ERR(fastrpc_class)) {
+ pr_err("failed to create rpmsg class\n");
+ ret = PTR_ERR(fastrpc_class);
+ goto err_class;
+ }
+
+ ret = platform_driver_register(&fastrpc_cb_driver);
+ if (ret < 0) {
+ pr_err("fastrpc: failed to register cb driver\n");
+ goto err_pdev;
+ }
+
+ ret = register_rpmsg_driver(&fastrpc_driver);
+ if (ret < 0) {
+ pr_err("fastrpc: failed to register rpmsg driver\n");
+ goto err_rpdrv;
+ }
+
+ return 0;
+err_rpdrv:
+ platform_driver_unregister(&fastrpc_cb_driver);
+err_pdev:
+ class_destroy(fastrpc_class);
+err_class:
+ unregister_chrdev_region(fastrpc_major, FASTRPC_DEV_MAX);
+ return ret;
+}
+module_init(fastrpc_init);
+
+static void fastrpc_exit(void)
+{
+ platform_driver_unregister(&fastrpc_cb_driver);
+ unregister_rpmsg_driver(&fastrpc_driver);
+ class_destroy(fastrpc_class);
+ unregister_chrdev_region(fastrpc_major, FASTRPC_DEV_MAX);
+}
+module_exit(fastrpc_exit);
+
+MODULE_ALIAS("fastrpc:fastrpc");
+MODULE_LICENSE("GPL v2");
--
2.19.2


2018-11-30 10:49:37

by Srinivas Kandagatla

[permalink] [raw]
Subject: [RFC PATCH 4/6] char: fastrpc: Add support for create remote init process

This patch adds support to create or attach remote shell process.
The shell process called fastrpc_shell_0 is usually loaded on the DSP
when a user process is spawned.

Most of the work is derived from various downstream Qualcomm kernels.
Credits to various Qualcomm authors who have contributed to this code.
Specially Tharun Kumar Merugu <[email protected]>

Signed-off-by: Srinivas Kandagatla <[email protected]>
---
drivers/char/fastrpc.c | 172 +++++++++++++++++++++++++++++++++++
include/uapi/linux/fastrpc.h | 18 ++++
2 files changed, 190 insertions(+)

diff --git a/drivers/char/fastrpc.c b/drivers/char/fastrpc.c
index 5bb224adc24f..3630e883d3f4 100644
--- a/drivers/char/fastrpc.c
+++ b/drivers/char/fastrpc.c
@@ -30,6 +30,8 @@
#define FASTRPC_PHYS(p) (p & 0xffffffff)
#define FASTRPC_CTX_MAX (256)
#define FASTRPC_CTXID_MASK (0xFF0)
+#define INIT_FILELEN_MAX (2*1024*1024)
+#define INIT_MEMLEN_MAX (8*1024*1024)
#define FASTRPC_DEVICE_NAME "fastrpc"

/* Retrives number of input buffers from the scalars parameter */
@@ -59,6 +61,14 @@

#define FASTRPC_SCALARS(method, in, out) \
FASTRPC_BUILD_SCALARS(0, method, in, out, 0, 0)
+
+/* Remote Method id table */
+#define FASTRPC_RMID_INIT_ATTACH 0
+#define FASTRPC_RMID_INIT_RELEASE 1
+#define FASTRPC_RMID_INIT_CREATE 6
+#define FASTRPC_RMID_INIT_CREATE_ATTR 7
+#define FASTRPC_RMID_INIT_CREATE_STATIC 8
+
#define cdev_to_cctx(d) container_of(d, struct fastrpc_channel_ctx, cdev)

static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
@@ -735,6 +745,130 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,

return err;
}
+
+static int fastrpc_init_process(struct fastrpc_user *fl,
+ struct fastrpc_ioctl_init *init)
+{
+ struct fastrpc_ioctl_invoke *ioctl;
+ struct fastrpc_phy_page pages[1];
+ struct fastrpc_map *file = NULL, *mem = NULL;
+ struct fastrpc_buf *imem = NULL;
+ int err = 0;
+
+ ioctl = kzalloc(sizeof(*ioctl), GFP_KERNEL);
+ if (!ioctl)
+ return -ENOMEM;
+
+ if (init->flags == FASTRPC_INIT_ATTACH) {
+ remote_arg_t ra[1];
+ int tgid = fl->tgid;
+
+ ra[0].buf.pv = (void *)&tgid;
+ ra[0].buf.len = sizeof(tgid);
+ ioctl->handle = 1;
+ ioctl->sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_ATTACH, 1, 0);
+ ioctl->pra = ra;
+ fl->pd = 0;
+
+ err = fastrpc_internal_invoke(fl, 1, ioctl);
+ if (err)
+ goto bail;
+ } else if (init->flags == FASTRPC_INIT_CREATE) {
+ int memlen;
+ remote_arg_t ra[6];
+ int fds[6];
+ struct {
+ int pgid;
+ unsigned int namelen;
+ unsigned int filelen;
+ unsigned int pageslen;
+ int attrs;
+ int siglen;
+ } inbuf;
+
+ inbuf.pgid = fl->tgid;
+ inbuf.namelen = strlen(current->comm) + 1;
+ inbuf.filelen = init->filelen;
+ fl->pd = 1;
+
+ if (init->filelen) {
+ err = fastrpc_map_create(fl, init->filefd,
+ init->file, init->filelen,
+ &file);
+ if (err)
+ goto bail;
+ }
+ inbuf.pageslen = 1;
+
+ if (init->mem) {
+ err = -EINVAL;
+ pr_err("adsprpc: %s: %s: ERROR: donated memory allocated in userspace\n",
+ current->comm, __func__);
+ goto bail;
+ }
+ memlen = ALIGN(max(INIT_FILELEN_MAX, (int)init->filelen * 4),
+ 1024 * 1024);
+ err = fastrpc_buf_alloc(fl, fl->sctx->dev, memlen,
+ &imem);
+ if (err)
+ goto bail;
+
+ fl->init_mem = imem;
+ inbuf.pageslen = 1;
+ ra[0].buf.pv = (void *)&inbuf;
+ ra[0].buf.len = sizeof(inbuf);
+ fds[0] = 0;
+
+ ra[1].buf.pv = (void *)current->comm;
+ ra[1].buf.len = inbuf.namelen;
+ fds[1] = 0;
+
+ ra[2].buf.pv = (void *)init->file;
+ ra[2].buf.len = inbuf.filelen;
+ fds[2] = init->filefd;
+
+ pages[0].addr = imem->phys;
+ pages[0].size = imem->size;
+
+ ra[3].buf.pv = (void *)pages;
+ ra[3].buf.len = 1 * sizeof(*pages);
+ fds[3] = 0;
+
+ inbuf.attrs = init->attrs;
+ ra[4].buf.pv = (void *)&(inbuf.attrs);
+ ra[4].buf.len = sizeof(inbuf.attrs);
+ fds[4] = 0;
+
+ inbuf.siglen = init->siglen;
+ ra[5].buf.pv = (void *)&(inbuf.siglen);
+ ra[5].buf.len = sizeof(inbuf.siglen);
+ fds[5] = 0;
+
+ ioctl->handle = 1;
+ ioctl->sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE, 4, 0);
+ if (init->attrs)
+ ioctl->sc = FASTRPC_SCALARS(
+ FASTRPC_RMID_INIT_CREATE_ATTR, 6, 0);
+ ioctl->pra = ra;
+ ioctl->fds = fds;
+ err = fastrpc_internal_invoke(fl, 1, ioctl);
+ if (err)
+ goto bail;
+ } else {
+ err = -ENOTTY;
+ }
+bail:
+ kfree(ioctl);
+
+ if (mem && err)
+ fastrpc_map_put(mem);
+
+ if (file)
+ fastrpc_map_put(file);
+
+ return err;
+}
+
static struct fastrpc_session_ctx *fastrpc_session_alloc(
struct fastrpc_channel_ctx *cctx,
int secure)
@@ -769,6 +903,25 @@ static const struct of_device_id fastrpc_match_table[] = {
{}
};

+static int fastrpc_release_current_dsp_process(struct fastrpc_user *fl)
+{
+ struct fastrpc_ioctl_invoke ioctl;
+ remote_arg_t ra[1];
+ int tgid = 0;
+
+ tgid = fl->tgid;
+ ra[0].buf.pv = (void *)&tgid;
+ ra[0].buf.len = sizeof(tgid);
+ ioctl.handle = 1;
+ ioctl.sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_RELEASE, 1, 0);
+ ioctl.pra = ra;
+ ioctl.fds = NULL;
+ ioctl.attrs = NULL;
+ ioctl.crc = NULL;
+
+ return fastrpc_internal_invoke(fl, 1, &ioctl);
+}
+
static int fastrpc_device_release(struct inode *inode, struct file *file)
{
struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
@@ -776,6 +929,8 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
struct fastrpc_invoke_ctx *ctx, *n;
struct fastrpc_map *map, *m;

+ fastrpc_release_current_dsp_process(fl);
+
spin_lock(&cctx->lock);
list_del(&fl->user);
spin_unlock(&cctx->lock);
@@ -855,6 +1010,23 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
goto bail;
break;
}
+ case FASTRPC_IOCTL_INIT: {
+ struct fastrpc_ioctl_init init;
+
+ init.attrs = 0;
+ init.siglen = 0;
+ err = copy_from_user(&init, argp, sizeof(init));
+ if (err)
+ goto bail;
+ if (init.filelen > INIT_FILELEN_MAX)
+ goto bail;
+ if (init.memlen > INIT_MEMLEN_MAX)
+ goto bail;
+ err = fastrpc_init_process(fl, &init);
+ if (err)
+ goto bail;
+ }
+ break;
default:
err = -ENOTTY;
pr_info("bad ioctl: %d\n", cmd);
diff --git a/include/uapi/linux/fastrpc.h b/include/uapi/linux/fastrpc.h
index 8fec66601337..6b596fc7ddf3 100644
--- a/include/uapi/linux/fastrpc.h
+++ b/include/uapi/linux/fastrpc.h
@@ -6,6 +6,12 @@
#include <linux/types.h>

#define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_ioctl_invoke)
+#define FASTRPC_IOCTL_INIT _IOWR('R', 4, struct fastrpc_ioctl_init)
+
+/* INIT a new process or attach to guestos */
+#define FASTRPC_INIT_ATTACH 0
+#define FASTRPC_INIT_CREATE 1
+#define FASTRPC_INIT_CREATE_STATIC 2

#define remote_arg64_t union remote_arg64

@@ -53,4 +59,16 @@ struct fastrpc_ioctl_invoke {
unsigned int *crc;
};

+struct fastrpc_ioctl_init {
+ uint32_t flags; /* one of FASTRPC_INIT_* macros */
+ uintptr_t file; /* pointer to elf file */
+ uint32_t filelen; /* elf file length */
+ int32_t filefd; /* ION fd for the file */
+ uintptr_t mem; /* mem for the PD */
+ uint32_t memlen; /* mem length */
+ int32_t memfd; /* fd for the mem */
+ int attrs;
+ unsigned int siglen;
+};
+
#endif /* __QCOM_FASTRPC_H__ */
--
2.19.2


2018-11-30 10:50:21

by Srinivas Kandagatla

[permalink] [raw]
Subject: [RFC PATCH 1/6] char: dt-bindings: Add Qualcomm Fastrpc bindings

The FastRPC driver implements an IPC (Inter-Processor Communication)
mechanism that allows for clients to transparently make remote method
invocations across DSP and APPS boundaries. This enables developers
to offload tasks to the DSP and free up the application processor for
other tasks.

Signed-off-by: Srinivas Kandagatla <[email protected]>
---
.../devicetree/bindings/char/qcom,fastrpc.txt | 73 +++++++++++++++++++
1 file changed, 73 insertions(+)
create mode 100644 Documentation/devicetree/bindings/char/qcom,fastrpc.txt

diff --git a/Documentation/devicetree/bindings/char/qcom,fastrpc.txt b/Documentation/devicetree/bindings/char/qcom,fastrpc.txt
new file mode 100644
index 000000000000..d5ff548960f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/char/qcom,fastrpc.txt
@@ -0,0 +1,73 @@
+Qualcomm Technologies, Inc. FastRPC Driver
+
+The FastRPC driver implements an IPC (Inter-Processor Communication)
+mechanism that allows for clients to transparently make remote method
+invocations across DSP and APPS boundaries. This enables developers
+to offload tasks to the DSP and free up the application processor for
+other tasks.
+
+- compatible:
+ Usage: required
+ Value type: <stringlist>
+ Definition: must be "qcom,fastrpc"
+
+- reg
+ Usage: required
+ Value type: <u32>
+ Definition: DSP Processor ID.
+ Possible values are :
+ 0 - ADSP
+ 1 - MDSP
+ 2 - SDSP
+ 3 - CDSP
+
+= COMPUTE BANKS
+Each subnode of the Fastrpc node represents compute context banks avaiable
+on the dsp. The name of the nodes are not important. The properties of these
+nodes are defined by the individual bindings for the specific service
+- All Compute context banks MUST contain the following property:
+
+- compatible:
+ Usage: required
+ Value type: <stringlist>
+ Definition: must be "qcom,fastrpc-compute-cb"
+
+- reg
+ Usage: required
+ Value type: <u32>
+ Definition: Context Bank ID.
+
+- secured:
+ Usage: Optional
+ Value type: <boolean>
+ Defination: Indicating context bank is secured
+
+- nsessions:
+ Usage: Optional
+ Value type: <u32>
+ Defination: A value indicating how many sessions can share this
+ context bank. Defaults to 'One' when this property
+ is not specified.
+
+Example:
+
+fastrpc {
+ compatible = "qcom,fastrpc";
+ qcom,smd-channels = "fastrpcsmd-apps-dsp";
+ reg = <0>; /* DOMAIN_ADSP */
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cb@1 {
+ compatible = "qcom,fastrpc-compute-cb";
+ reg = <1>;
+ iommus = <&lpass_q6_smmu 1>;
+ };
+
+ cb@2 {
+ compatible = "qcom,fastrpc-compute-cb";
+ reg = <2>;
+ iommus = <&lpass_q6_smmu 2>;
+ };
+ ...
+};
--
2.19.2


2018-11-30 12:59:59

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [RFC PATCH 6/6] char: fastrpc: Add support for compat ioctls

On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
<[email protected]> wrote:
>
> From: Thierry Escande <[email protected]>
>
> This patch adds support for compat ioctl from 32 bits userland to
> Qualcomm fastrpc driver.
>
> Supported ioctls in this change are INIT, INVOKE, and ALLOC/FREE_DMA.
>
> Most of the work is derived from various downstream Qualcomm kernels.
> Credits to various Qualcomm authors who have contributed to this code.
> Specially Tharun Kumar Merugu <[email protected]>
>
> Signed-off-by: Thierry Escande <[email protected]>
> Signed-off-by: Srinivas Kandagatla <[email protected]>

Can't you just define the native ioctls so that you don't need this.

Arnd

2018-11-30 13:23:24

by Thierry Escande

[permalink] [raw]
Subject: Re: [RFC PATCH 6/6] char: fastrpc: Add support for compat ioctls

Hi Arnd,

On 30/11/2018 13:58, Arnd Bergmann wrote:
> On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
> <[email protected]> wrote:
>>
>> From: Thierry Escande <[email protected]>
>>
>> This patch adds support for compat ioctl from 32 bits userland to
>> Qualcomm fastrpc driver.
>>
>> Supported ioctls in this change are INIT, INVOKE, and ALLOC/FREE_DMA.
>>
>> Most of the work is derived from various downstream Qualcomm kernels.
>> Credits to various Qualcomm authors who have contributed to this code.
>> Specially Tharun Kumar Merugu <[email protected]>
>>
>> Signed-off-by: Thierry Escande <[email protected]>
>> Signed-off-by: Srinivas Kandagatla <[email protected]>
>
> Can't you just define the native ioctls so that you don't need this.

There are long time defined structures that are passed as argument to
these ioctls and their sizes vary between 32 and 64 bits userlands, so
the ioctl command values.

Unless I'm missing something here this also has the advantage not to be
compiled if CONFIG_COMPAT is not set.

Regards,
Thierry

2018-11-30 13:27:33

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [RFC PATCH 4/6] char: fastrpc: Add support for create remote init process

On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
<[email protected]> wrote:

> +
> +static int fastrpc_init_process(struct fastrpc_user *fl,
> + struct fastrpc_ioctl_init *init)
> +{
> + struct fastrpc_ioctl_invoke *ioctl;
> + struct fastrpc_phy_page pages[1];
> + struct fastrpc_map *file = NULL, *mem = NULL;
> + struct fastrpc_buf *imem = NULL;
> + int err = 0;
> +
> + ioctl = kzalloc(sizeof(*ioctl), GFP_KERNEL);
> + if (!ioctl)
> + return -ENOMEM;
> +
> + if (init->flags == FASTRPC_INIT_ATTACH) {
> + remote_arg_t ra[1];
> + int tgid = fl->tgid;
> +
> + ra[0].buf.pv = (void *)&tgid;
> + ra[0].buf.len = sizeof(tgid);
> + ioctl->handle = 1;
> + ioctl->sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_ATTACH, 1, 0);
> + ioctl->pra = ra;
> + fl->pd = 0;
> +
> + err = fastrpc_internal_invoke(fl, 1, ioctl);
> + if (err)
> + goto bail;

It doesn't seem right to me to dynamically allocate an 'ioctl' data structure
from kernel context and pass that down to another function. Maybe eliminate
that structure and change fastrpc_internal_invoke to take the individual
arguments here instead of a pointer?

> + } else if (init->flags == FASTRPC_INIT_CREATE) {

How about splitting each flags== case into a separate function?

> diff --git a/include/uapi/linux/fastrpc.h b/include/uapi/linux/fastrpc.h
> index 8fec66601337..6b596fc7ddf3 100644
> --- a/include/uapi/linux/fastrpc.h
> +++ b/include/uapi/linux/fastrpc.h
> @@ -6,6 +6,12 @@
> #include <linux/types.h>
>
> #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_ioctl_invoke)
> +#define FASTRPC_IOCTL_INIT _IOWR('R', 4, struct fastrpc_ioctl_init)
> +
> +/* INIT a new process or attach to guestos */
> +#define FASTRPC_INIT_ATTACH 0
> +#define FASTRPC_INIT_CREATE 1
> +#define FASTRPC_INIT_CREATE_STATIC 2

Maybe use three command codes here, and remove the 'flags' member?

> @@ -53,4 +59,16 @@ struct fastrpc_ioctl_invoke {
> unsigned int *crc;
> };
>
> +struct fastrpc_ioctl_init {
> + uint32_t flags; /* one of FASTRPC_INIT_* macros */
> + uintptr_t file; /* pointer to elf file */
> + uint32_t filelen; /* elf file length */
> + int32_t filefd; /* ION fd for the file */

What does this have to do with ION? The driver seems to otherwise
just use the generic dma_buf interfaces.

> + uintptr_t mem; /* mem for the PD */
> + uint32_t memlen; /* mem length */
> + int32_t memfd; /* fd for the mem */
> + int attrs;
> + unsigned int siglen;
> +};

This structure is again not suitable for ioctls. Please used fixed-length
members and no holes in the structure.

Arnd

2018-11-30 13:37:04

by Srinivas Kandagatla

[permalink] [raw]
Subject: Re: [RFC PATCH 4/6] char: fastrpc: Add support for create remote init process

Thanks Arnd for the review comments,

On 30/11/18 13:26, Arnd Bergmann wrote:
> On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
> <[email protected]> wrote:
>
>> +
>> +static int fastrpc_init_process(struct fastrpc_user *fl,
>> + struct fastrpc_ioctl_init *init)
>> +{
>> + struct fastrpc_ioctl_invoke *ioctl;
>> + struct fastrpc_phy_page pages[1];
>> + struct fastrpc_map *file = NULL, *mem = NULL;
>> + struct fastrpc_buf *imem = NULL;
>> + int err = 0;
>> +
>> + ioctl = kzalloc(sizeof(*ioctl), GFP_KERNEL);
>> + if (!ioctl)
>> + return -ENOMEM;
>> +
>> + if (init->flags == FASTRPC_INIT_ATTACH) {
>> + remote_arg_t ra[1];
>> + int tgid = fl->tgid;
>> +
>> + ra[0].buf.pv = (void *)&tgid;
>> + ra[0].buf.len = sizeof(tgid);
>> + ioctl->handle = 1;
>> + ioctl->sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_ATTACH, 1, 0);
>> + ioctl->pra = ra;
>> + fl->pd = 0;
>> +
>> + err = fastrpc_internal_invoke(fl, 1, ioctl);
>> + if (err)
>> + goto bail;
>
> It doesn't seem right to me to dynamically allocate an 'ioctl' data structure
> from kernel context and pass that down to another function. Maybe eliminate
> that structure and change fastrpc_internal_invoke to take the individual
> arguments here instead of a pointer?
Yes, I totally agree with you, Will rework this part as suggested.
>
>> + } else if (init->flags == FASTRPC_INIT_CREATE) {
>
> How about splitting each flags== case into a separate function?

Once I move this to a command code then make this a separate function.
>
>> diff --git a/include/uapi/linux/fastrpc.h b/include/uapi/linux/fastrpc.h
>> index 8fec66601337..6b596fc7ddf3 100644
>> --- a/include/uapi/linux/fastrpc.h
>> +++ b/include/uapi/linux/fastrpc.h
>> @@ -6,6 +6,12 @@
>> #include <linux/types.h>
>>
>> #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_ioctl_invoke)
>> +#define FASTRPC_IOCTL_INIT _IOWR('R', 4, struct fastrpc_ioctl_init)
>> +
>> +/* INIT a new process or attach to guestos */
>> +#define FASTRPC_INIT_ATTACH 0
>> +#define FASTRPC_INIT_CREATE 1
>> +#define FASTRPC_INIT_CREATE_STATIC 2
>
> Maybe use three command codes here, and remove the 'flags' member?
>
Make sense, will do it in next version.

>> @@ -53,4 +59,16 @@ struct fastrpc_ioctl_invoke {
>> unsigned int *crc;
>> };
>>
>> +struct fastrpc_ioctl_init {
>> + uint32_t flags; /* one of FASTRPC_INIT_* macros */
>> + uintptr_t file; /* pointer to elf file */
>> + uint32_t filelen; /* elf file length */
>> + int32_t filefd; /* ION fd for the file */
>
> What does this have to do with ION? The driver seems to otherwise
> just use the generic dma_buf interfaces.
Yes, the driver just uses dma_buf, it looks like leftover from downstream!

>
>> + uintptr_t mem; /* mem for the PD */
>> + uint32_t memlen; /* mem length */
>> + int32_t memfd; /* fd for the mem */
>> + int attrs;
>> + unsigned int siglen;
>> +};
>
> This structure is again not suitable for ioctls. Please used fixed-length
> members and no holes in the structure.
Sure, Will recheck all the structures before sending next version!


--srini
>
> Arnd
>

2018-11-30 13:42:15

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [RFC PATCH 3/6] char: fastrpc: Add support for context Invoke method

On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
<[email protected]> wrote:
>
> This patch adds support to compute context invoke method
> on the remote processor (DSP).
> This involves setting up the functions input and output arguments,
> input and output handles and mapping the dmabuf fd for the
> argument/handle buffers.
>
> Most of the work is derived from various downstream Qualcomm kernels.
> Credits to various Qualcomm authors who have contributed to this code.
> Specially Tharun Kumar Merugu <[email protected]>
>
> Signed-off-by: Srinivas Kandagatla <[email protected]>

> +
> + INIT_LIST_HEAD(&ctx->node);
> + ctx->fl = user;
> + ctx->maps = (struct fastrpc_map **)(&ctx[1]);
> + ctx->lpra = (remote_arg_t *)(&ctx->maps[bufs]);
> + ctx->fds = (int *)(&ctx->lpra[bufs]);
> + ctx->attrs = (unsigned int *)(&ctx->fds[bufs]);
> +
> + if (!kernel) {
> + if (copy_from_user(ctx->lpra,
> + (void const __user *)inv->pra,
> + bufs * sizeof(*ctx->lpra))) {
> + err = -EFAULT;
> + goto err;
> + }
> +
> + if (inv->fds) {
> + if (copy_from_user(ctx->fds,
> + (void const __user *)inv->fds,
> + bufs * sizeof(*ctx->fds))) {
> + err = -EFAULT;
> + goto err;
> + }
> + }
> + if (inv->attrs) {
> + if (copy_from_user(
> + ctx->attrs,
> + (void const __user *)inv->attrs,
> + bufs * sizeof(*ctx->attrs))) {
> + err = -EFAULT;
> + goto err;
> + }
> + }
> + } else {
> + memcpy(ctx->lpra, inv->pra, bufs * sizeof(*ctx->lpra));
> + if (inv->fds)
> + memcpy(ctx->fds, inv->fds,
> + bufs * sizeof(*ctx->fds));
> + if (inv->attrs)
> + memcpy(ctx->attrs, inv->attrs,
> + bufs * sizeof(*ctx->attrs));
> + }

I'd split this function into multiple pieces: the internal one that
just takes kernel pointers, and a wrapper for the ioctl
that copies the user space data into the kernel before calling
the second one.

> +static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
> + uint32_t kernel, remote_arg_t *upra)
> +{
> + remote_arg64_t *rpra = ctx->rpra;
> + int i, inbufs, outbufs, handles;
> + struct fastrpc_invoke_buf *list;
> + struct fastrpc_phy_page *pages;
> + struct fastrpc_map *mmap;
> + uint32_t sc = ctx->sc;
> + uint64_t *fdlist;
> + uint32_t *crclist;
> + int err = 0;
> +
> + inbufs = REMOTE_SCALARS_INBUFS(sc);
> + outbufs = REMOTE_SCALARS_OUTBUFS(sc);
> + handles = REMOTE_SCALARS_INHANDLES(sc) + REMOTE_SCALARS_OUTHANDLES(sc);
> + list = fastrpc_invoke_buf_start(ctx->rpra, sc);
> + pages = fastrpc_phy_page_start(sc, list);
> + fdlist = (uint64_t *)(pages + inbufs + outbufs + handles);
> + crclist = (uint32_t *)(fdlist + FASTRPC_MAX_FDLIST);
> +
> + for (i = inbufs; i < inbufs + outbufs; ++i) {
> + if (!ctx->maps[i]) {
> + if (!kernel)
> + err =
> + copy_to_user((void __user *)ctx->lpra[i].buf.pv,
> + (void *)rpra[i].buf.pv, rpra[i].buf.len);
> + else
> + memcpy(ctx->lpra[i].buf.pv,
> + (void *)rpra[i].buf.pv, rpra[i].buf.len);
> +
> + if (err)
> + goto bail;
> + } else {
> + fastrpc_map_put(ctx->maps[i]);
> + ctx->maps[i] = NULL;
> + }
> + }

Same here.

> +static int fastrpc_internal_invoke(struct fastrpc_user *fl,
> + uint32_t kernel,
> + struct fastrpc_ioctl_invoke *inv)
> +{
> + struct fastrpc_invoke_ctx *ctx = NULL;
> + int err = 0;
> +
> + if (!fl->sctx)
> + return -EINVAL;
> +
> + ctx = fastrpc_context_alloc(fl, kernel, inv);
> + if (IS_ERR(ctx))
> + return PTR_ERR(ctx);
> +
> + if (REMOTE_SCALARS_LENGTH(ctx->sc)) {
> + err = fastrpc_get_args(kernel, ctx);
> + if (err)
> + goto bail;
> + }
> +
> + err = fastrpc_invoke_send(fl->sctx, ctx, kernel, inv->handle);
> + if (err)
> + goto bail;
> +
> + err = wait_for_completion_interruptible(&ctx->work);
> + if (err)
> + goto bail;

Can you add comments here to explain the control flow?
What exactly are we waiting for here? Does the completion
indicate that the remote side is done executing the code
and ready to do something else?

> +static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
> + unsigned long arg)
> +{
> + struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
> + struct fastrpc_channel_ctx *cctx = fl->cctx;
> + char __user *argp = (char __user *)arg;
> + int err;
> +
> + if (!fl->sctx) {
> + fl->sctx = fastrpc_session_alloc(cctx, 0);
> + if (!fl->sctx)
> + return -ENOENT;
> + }

Shouldn't that session be allocated during open()?

> +static void fastrpc_notify_users(struct fastrpc_user *user)
> +{
> + struct fastrpc_invoke_ctx *ctx, *n;
> +
> + spin_lock(&user->lock);
> + list_for_each_entry_safe(ctx, n, &user->pending, node)
> + complete(&ctx->work);
> + spin_unlock(&user->lock);
> +}

Can you explain here what it means to have multiple 'users' for
a 'fastrpc_user' structure? Why are they all done at the same time?

> +struct remote_dma_handle64 {
> + int fd;
> + uint32_t offset;
> + uint32_t len;
> +};

Maybe always make the offset/len fields and others 64 bit?

> +union remote_arg64 {
> + struct remote_buf64 buf;
> + struct remote_dma_handle64 dma;
> + uint32_t h;
> +};
> +
> +#define remote_arg_t union remote_arg
> +
> +struct remote_buf {
> + void *pv; /* buffer pointer */
> + size_t len; /* length of buffer */
> +};
> +
> +struct remote_dma_handle {
> + int fd;
> + uint32_t offset;
> +};
> +
> +union remote_arg {
> + struct remote_buf buf; /* buffer info */
> + struct remote_dma_handle dma;
> + uint32_t h; /* remote handle */
> +};

Try to avoid the padding at the end of the structure,
if you can't, then add a __reserved member.

I'd also recommend avoiding nested structures and
unions. Add more commands if necessary.

> +struct fastrpc_ioctl_invoke {
> + uint32_t handle; /* remote handle */
> + uint32_t sc; /* scalars describing the data */
> + remote_arg_t *pra; /* remote arguments list */
> + int *fds; /* fd list */
> + unsigned int *attrs; /* attribute list */
> + unsigned int *crc;
> +};

This seems too complex for an ioctl argument, with
multiple levels of pointer indirections. I'd normally
try to make each ioctl argument either a scalar, or a
structure with only fixed-length members.

The way we did this in spufs was to set up a context
first with all the information it needed, and make the
actual context switch from host CPU to remote a very
simple operation that took as few arguments as possible,
in case of spu_run() only the instruction pointer and
the location of the return status.

Arnd

2018-11-30 13:47:49

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [RFC PATCH 6/6] char: fastrpc: Add support for compat ioctls

On Fri, Nov 30, 2018 at 2:20 PM Thierry Escande
<[email protected]> wrote:
> On 30/11/2018 13:58, Arnd Bergmann wrote:
> > On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
> > <[email protected]> wrote:
> >>
> >> From: Thierry Escande <[email protected]>
> >>
> >> This patch adds support for compat ioctl from 32 bits userland to
> >> Qualcomm fastrpc driver.
> >>
> >> Supported ioctls in this change are INIT, INVOKE, and ALLOC/FREE_DMA.
> >>
> >> Most of the work is derived from various downstream Qualcomm kernels.
> >> Credits to various Qualcomm authors who have contributed to this code.
> >> Specially Tharun Kumar Merugu <[email protected]>
> >>
> >> Signed-off-by: Thierry Escande <[email protected]>
> >> Signed-off-by: Srinivas Kandagatla <[email protected]>
> >
> > Can't you just define the native ioctls so that you don't need this.
>
> There are long time defined structures that are passed as argument to
> these ioctls and their sizes vary between 32 and 64 bits userlands, so
> the ioctl command values.

Where? I don't see them in linux-4.19.

> Unless I'm missing something here this also has the advantage not to be
> compiled if CONFIG_COMPAT is not set.

You can normally just set .compat_ioctl() to the same function as
.unlocked_ioctl(), and get no overhead either way.

Arnd

2018-11-30 13:52:46

by Thierry Escande

[permalink] [raw]
Subject: Re: [RFC PATCH 6/6] char: fastrpc: Add support for compat ioctls

On 30/11/2018 14:46, Arnd Bergmann wrote:
> On Fri, Nov 30, 2018 at 2:20 PM Thierry Escande
> <[email protected]> wrote:
>> On 30/11/2018 13:58, Arnd Bergmann wrote:
>>> On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
>>> <[email protected]> wrote:
>>>>
>>>> From: Thierry Escande <[email protected]>
>>>>
>>>> This patch adds support for compat ioctl from 32 bits userland to
>>>> Qualcomm fastrpc driver.
>>>>
>>>> Supported ioctls in this change are INIT, INVOKE, and ALLOC/FREE_DMA.
>>>>
>>>> Most of the work is derived from various downstream Qualcomm kernels.
>>>> Credits to various Qualcomm authors who have contributed to this code.
>>>> Specially Tharun Kumar Merugu <[email protected]>
>>>>
>>>> Signed-off-by: Thierry Escande <[email protected]>
>>>> Signed-off-by: Srinivas Kandagatla <[email protected]>
>>>
>>> Can't you just define the native ioctls so that you don't need this.
>>
>> There are long time defined structures that are passed as argument to
>> these ioctls and their sizes vary between 32 and 64 bits userlands, so
>> the ioctl command values.
>
> Where? I don't see them in linux-4.19.
>
>> Unless I'm missing something here this also has the advantage not to be
>> compiled if CONFIG_COMPAT is not set.
>
> You can normally just set .compat_ioctl() to the same function as
> .unlocked_ioctl(), and get no overhead either way.

As you suggested for the other patches in this series, we will rework
these ioctl data structures and then use the same function.

Regards,
Thierry

2018-11-30 15:05:40

by Srinivas Kandagatla

[permalink] [raw]
Subject: Re: [RFC PATCH 3/6] char: fastrpc: Add support for context Invoke method

Thanks Arnd for the review comments!

On 30/11/18 13:41, Arnd Bergmann wrote:
> On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
> <[email protected]> wrote:
>>
>> This patch adds support to compute context invoke method
>> on the remote processor (DSP).
>> This involves setting up the functions input and output arguments,
>> input and output handles and mapping the dmabuf fd for the
>> argument/handle buffers.
>>
>> Most of the work is derived from various downstream Qualcomm kernels.
>> Credits to various Qualcomm authors who have contributed to this code.
>> Specially Tharun Kumar Merugu <[email protected]>
>>
>> Signed-off-by: Srinivas Kandagatla <[email protected]>
>
>> +
>> + INIT_LIST_HEAD(&ctx->node);
>> + ctx->fl = user;
>> + ctx->maps = (struct fastrpc_map **)(&ctx[1]);
>> + ctx->lpra = (remote_arg_t *)(&ctx->maps[bufs]);
>> + ctx->fds = (int *)(&ctx->lpra[bufs]);
>> + ctx->attrs = (unsigned int *)(&ctx->fds[bufs]);
>> +
>> + if (!kernel) {
>> + if (copy_from_user(ctx->lpra,
>> + (void const __user *)inv->pra,
>> + bufs * sizeof(*ctx->lpra))) {
>> + err = -EFAULT;
>> + goto err;
>> + }
>> +
>> + if (inv->fds) {
>> + if (copy_from_user(ctx->fds,
>> + (void const __user *)inv->fds,
>> + bufs * sizeof(*ctx->fds))) {
>> + err = -EFAULT;
>> + goto err;
>> + }
>> + }
>> + if (inv->attrs) {
>> + if (copy_from_user(
>> + ctx->attrs,
>> + (void const __user *)inv->attrs,
>> + bufs * sizeof(*ctx->attrs))) {
>> + err = -EFAULT;
>> + goto err;
>> + }
>> + }
>> + } else {
>> + memcpy(ctx->lpra, inv->pra, bufs * sizeof(*ctx->lpra));
>> + if (inv->fds)
>> + memcpy(ctx->fds, inv->fds,
>> + bufs * sizeof(*ctx->fds));
>> + if (inv->attrs)
>> + memcpy(ctx->attrs, inv->attrs,
>> + bufs * sizeof(*ctx->attrs));
>> + }
>
> I'd split this function into multiple pieces: the internal one that
> just takes kernel pointers, and a wrapper for the ioctl
> that copies the user space data into the kernel before calling
> the second one.

Sure, will be done in next version!
>
>> +static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
>> + uint32_t kernel, remote_arg_t *upra)
>> +{
>> + remote_arg64_t *rpra = ctx->rpra;
>> + int i, inbufs, outbufs, handles;
>> + struct fastrpc_invoke_buf *list;
>> + struct fastrpc_phy_page *pages;
>> + struct fastrpc_map *mmap;
>> + uint32_t sc = ctx->sc;
>> + uint64_t *fdlist;
>> + uint32_t *crclist;
>> + int err = 0;
>> +
>> + inbufs = REMOTE_SCALARS_INBUFS(sc);
>> + outbufs = REMOTE_SCALARS_OUTBUFS(sc);
>> + handles = REMOTE_SCALARS_INHANDLES(sc) + REMOTE_SCALARS_OUTHANDLES(sc);
>> + list = fastrpc_invoke_buf_start(ctx->rpra, sc);
>> + pages = fastrpc_phy_page_start(sc, list);
>> + fdlist = (uint64_t *)(pages + inbufs + outbufs + handles);
>> + crclist = (uint32_t *)(fdlist + FASTRPC_MAX_FDLIST);
>> +
>> + for (i = inbufs; i < inbufs + outbufs; ++i) {
>> + if (!ctx->maps[i]) {
>> + if (!kernel)
>> + err =
>> + copy_to_user((void __user *)ctx->lpra[i].buf.pv,
>> + (void *)rpra[i].buf.pv, rpra[i].buf.len);
>> + else
>> + memcpy(ctx->lpra[i].buf.pv,
>> + (void *)rpra[i].buf.pv, rpra[i].buf.len);
>> +
>> + if (err)
>> + goto bail;
>> + } else {
>> + fastrpc_map_put(ctx->maps[i]);
>> + ctx->maps[i] = NULL;
>> + }
>> + }
>
> Same here.
>
>> +static int fastrpc_internal_invoke(struct fastrpc_user *fl,
>> + uint32_t kernel,
>> + struct fastrpc_ioctl_invoke *inv)
>> +{
>> + struct fastrpc_invoke_ctx *ctx = NULL;
>> + int err = 0;
>> +
>> + if (!fl->sctx)
>> + return -EINVAL;
>> +
>> + ctx = fastrpc_context_alloc(fl, kernel, inv);
>> + if (IS_ERR(ctx))
>> + return PTR_ERR(ctx);
>> +
>> + if (REMOTE_SCALARS_LENGTH(ctx->sc)) {
>> + err = fastrpc_get_args(kernel, ctx);
>> + if (err)
>> + goto bail;
>> + }
>> +
>> + err = fastrpc_invoke_send(fl->sctx, ctx, kernel, inv->handle);
>> + if (err)
>> + goto bail;
>> +
>> + err = wait_for_completion_interruptible(&ctx->work);
>> + if (err)
>> + goto bail;
>
> Can you add comments here to explain the control flow?
> What exactly are we waiting for here? Does the completion
> indicate that the remote side is done executing the code
> and ready to do something else?

Sure I will add some detailed comment here, completion here means that
the remote side has finished with the execution of that particular context.

>
>> +static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
>> + unsigned long arg)
>> +{
>> + struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
>> + struct fastrpc_channel_ctx *cctx = fl->cctx;
>> + char __user *argp = (char __user *)arg;
>> + int err;
>> +
>> + if (!fl->sctx) {
>> + fl->sctx = fastrpc_session_alloc(cctx, 0);
>> + if (!fl->sctx)
>> + return -ENOENT;
>> + }
>
> Shouldn't that session be allocated during open()?
>
Yes, and no, we do not need context in all the cases. In cases like we
just want to allocate dmabuf.

>> +static void fastrpc_notify_users(struct fastrpc_user *user)
>> +{
>> + struct fastrpc_invoke_ctx *ctx, *n;
>> +
>> + spin_lock(&user->lock);
>> + list_for_each_entry_safe(ctx, n, &user->pending, node)
>> + complete(&ctx->work);
>> + spin_unlock(&user->lock);
>> +}
>
> Can you explain here what it means to have multiple 'users'
> a 'fastrpc_user' structure? Why are they all done at the same time?
>
This is the case where users need to be notified if the dsp goes down
due to crash or shut down!

>> +struct remote_dma_handle64 {
>> + int fd;
>> + uint32_t offset;
>> + uint32_t len;
>> +};
>
> Maybe always make the offset/len fields and others 64 bit?
>
yes, I will do that.
>> +union remote_arg64 {
>> + struct remote_buf64 buf;
>> + struct remote_dma_handle64 dma;
>> + uint32_t h;
>> +};
>> +
>> +#define remote_arg_t union remote_arg
>> +
>> +struct remote_buf {
>> + void *pv; /* buffer pointer */
>> + size_t len; /* length of buffer */
>> +};
>> +
>> +struct remote_dma_handle {
>> + int fd;
>> + uint32_t offset;
>> +};
>> +
>> +union remote_arg {
>> + struct remote_buf buf; /* buffer info */
>> + struct remote_dma_handle dma;
>> + uint32_t h; /* remote handle */
>> +};
>
> Try to avoid the padding at the end of the structure,
> if you can't, then add a __reserved member.
>
> I'd also recommend avoiding nested structures and
> unions. Add more commands if necessary.
I will revisit all the data structures and make sure we do not leave any
holes in the structure!
>
>> +struct fastrpc_ioctl_invoke {
>> + uint32_t handle; /* remote handle */
>> + uint32_t sc; /* scalars describing the data */
>> + remote_arg_t *pra; /* remote arguments list */
>> + int *fds; /* fd list */
>> + unsigned int *attrs; /* attribute list */
>> + unsigned int *crc;
>> +};
>
> This seems too complex for an ioctl argument, with
> multiple levels of pointer indirections. I'd normally
> try to make each ioctl argument either a scalar, or a
> structure with only fixed-length members.
>
I totally agree with you and many thanks for your expert inputs,
May be something like below with fixed length members would work?

struct fastrpc_remote_arg {
__u64 ptr; /* buffer ptr */
__u64 len; /* length */
__u32 fd; /* dmabuf fd */
__u32 reserved1
__u64 reserved2
};

struct fastrpc_remote_fd {
__u64 fd;
__u64 reserved1
__u64 reserved2
__u64 reserved3
};

struct fastrpc_remote_attr {
__u64 attr;
__u64 reserved1
__u64 reserved2
__u64 reserved3
};

struct fastrpc_remote_crc {
__u64 crc;
__u64 reserved1
__u64 reserved2
__u64 reserved3
};

struct fastrpc_ioctl_invoke {
__u32 handle;
__u32 sc;
/* The minimum size is scalar_length * 32 */
struct fastrpc_remote_args *rargs;
struct fastrpc_remote_fd *fds;
struct fastrpc_remote_attr *attrs;
struct fastrpc_remote_crc *crc;
};

> The way we did this in spufs was to set up a context
> first with all the information it needed, and make the
> actual context switch from host CPU to remote a very
> simple operation that took as few arguments as possible,
> in case of spu_run() only the instruction pointer and
> the location of the return status.

thanks,
srini
>
> Arnd
>

2018-11-30 15:11:30

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [RFC PATCH 3/6] char: fastrpc: Add support for context Invoke method

On Fri, Nov 30, 2018 at 4:01 PM Srinivas Kandagatla
<[email protected]> wrote:
> Thanks Arnd for the review comments!
> On 30/11/18 13:41, Arnd Bergmann wrote:
> > On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
> > <[email protected]> wrote:

> >> +static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
> >> + unsigned long arg)
> >> +{
> >> + struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
> >> + struct fastrpc_channel_ctx *cctx = fl->cctx;
> >> + char __user *argp = (char __user *)arg;
> >> + int err;
> >> +
> >> + if (!fl->sctx) {
> >> + fl->sctx = fastrpc_session_alloc(cctx, 0);
> >> + if (!fl->sctx)
> >> + return -ENOENT;
> >> + }
> >
> > Shouldn't that session be allocated during open()?
> >
> Yes, and no, we do not need context in all the cases. In cases like we
> just want to allocate dmabuf.

Can you give an example what that would be good for?

>
> >> +static void fastrpc_notify_users(struct fastrpc_user *user)
> >> +{
> >> + struct fastrpc_invoke_ctx *ctx, *n;
> >> +
> >> + spin_lock(&user->lock);
> >> + list_for_each_entry_safe(ctx, n, &user->pending, node)
> >> + complete(&ctx->work);
> >> + spin_unlock(&user->lock);
> >> +}
> >
> > Can you explain here what it means to have multiple 'users'
> > a 'fastrpc_user' structure? Why are they all done at the same time?
> >
> This is the case where users need to be notified if the dsp goes down
> due to crash or shut down!

What is a 'user' then? My point is that it seems to refer to two
different things here. I assume 'fastrpc_user' is whoever
has opened the file descriptor.

> >
> >> +struct fastrpc_ioctl_invoke {
> >> + uint32_t handle; /* remote handle */
> >> + uint32_t sc; /* scalars describing the data */
> >> + remote_arg_t *pra; /* remote arguments list */
> >> + int *fds; /* fd list */
> >> + unsigned int *attrs; /* attribute list */
> >> + unsigned int *crc;
> >> +};
> >
> > This seems too complex for an ioctl argument, with
> > multiple levels of pointer indirections. I'd normally
> > try to make each ioctl argument either a scalar, or a
> > structure with only fixed-length members.
> >
> I totally agree with you and many thanks for your expert inputs,
> May be something like below with fixed length members would work?
>
> struct fastrpc_remote_arg {
> __u64 ptr; /* buffer ptr */
> __u64 len; /* length */
> __u32 fd; /* dmabuf fd */
> __u32 reserved1
> __u64 reserved2
> };
>
> struct fastrpc_remote_fd {
> __u64 fd;
> __u64 reserved1
> __u64 reserved2
> __u64 reserved3
> };
>
> struct fastrpc_remote_attr {
> __u64 attr;
> __u64 reserved1
> __u64 reserved2
> __u64 reserved3
> };
>
> struct fastrpc_remote_crc {
> __u64 crc;
> __u64 reserved1
> __u64 reserved2
> __u64 reserved3
> };

I don't see a need to add extra served fields for structures
that are already naturally aligned here, e.g. in
fastrpc_remote_arg we need the 'reserved1' but not
the 'reserved2'.

>
> struct fastrpc_ioctl_invoke {
> __u32 handle;
> __u32 sc;
> /* The minimum size is scalar_length * 32 */
> struct fastrpc_remote_args *rargs;
> struct fastrpc_remote_fd *fds;
> struct fastrpc_remote_attr *attrs;
> struct fastrpc_remote_crc *crc;
> };

Do these really have to be indirect then? Are they all
lists of variable length? How do you know how long?

Arnd

2018-11-30 16:05:07

by Srinivas Kandagatla

[permalink] [raw]
Subject: Re: [RFC PATCH 3/6] char: fastrpc: Add support for context Invoke method



On 30/11/18 15:08, Arnd Bergmann wrote:
> On Fri, Nov 30, 2018 at 4:01 PM Srinivas Kandagatla
> <[email protected]> wrote:
>> Thanks Arnd for the review comments!
>> On 30/11/18 13:41, Arnd Bergmann wrote:
>>> On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
>>> <[email protected]> wrote:
>
>>>> +static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
>>>> + unsigned long arg)
>>>> +{
>>>> + struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
>>>> + struct fastrpc_channel_ctx *cctx = fl->cctx;
>>>> + char __user *argp = (char __user *)arg;
>>>> + int err;
>>>> +
>>>> + if (!fl->sctx) {
>>>> + fl->sctx = fastrpc_session_alloc(cctx, 0);
>>>> + if (!fl->sctx)
>>>> + return -ENOENT;
>>>> + }
>>>
>>> Shouldn't that session be allocated during open()?
>>>
>> Yes, and no, we do not need context in all the cases. In cases like we
>> just want to allocate dmabuf.
>
> Can you give an example what that would be good for?
>

Currently the instance which does not need session is used as simple
memory allocator (rpcmem), TBH, this is the side effect of trying to fit
in with downstream application infrastructure which uses ion for andriod
usecases.

>>
>>>> +static void fastrpc_notify_users(struct fastrpc_user *user)
>>>> +{
>>>> + struct fastrpc_invoke_ctx *ctx, *n;will go
>>>> +
>>>> + spin_lock(&user->lock);
>>>> + list_for_each_entry_safe(ctx, n, &user->pending, node)
>>>> + complete(&ctx->work);
>>>> + spin_unlock(&user->lock);
>>>> +}
>>>
>>> Can you explain here what it means to have multiple 'users'
>>> a 'fastrpc_user' structure? Why are they all done at the same time?

user is allocated on every open(). Having multiple users means that
there are more than one compute sessions running on a given dsp.

They reason why all the users are notified here is because the dsp is
going down, so all the compute sessions associated with it will not see
any response from dsp, so any pending/waiting compute contexts are
explicitly notified.

>>>
>> This is the case where users need to be notified if the dsp goes down
>> due to crash or shut down!
>
> What is a 'user' then? My point is that it seems to refer to two
> different things here. I assume 'fastrpc_user' is whoever
> has opened the file descriptor.
>
>>>
>>>> +struct fastrpc_ioctl_invoke {
>>>> + uint32_t handle; /* remote handle */
>>>> + uint32_t sc; /* scalars describing the data */
>>>> + remote_arg_t *pra; /* remote arguments list */
>>>> + int *fds; /* fd list */
>>>> + unsigned int *attrs; /* attribute list */
>>>> + unsigned int *crc;
>>>> +};
>>>
>>> This seems too complex for an ioctl argument, with
>>> multiple levels of pointer indirections. I'd normally
>>> try to make each ioctl argument either a scalar, or a
>>> structure with only fixed-length members.
>>>
>> I totally agree with you and many thanks for your expert inputs,
>> May be something like below with fixed length members would work?
>>
>> struct fastrpc_remote_arg {
>> __u64 ptr; /* buffer ptr */
>> __u64 len; /* length */
>> __u32 fd; /* dmabuf fd */
>> __u32 reserved1
>> __u64 reserved2
>> };
>>
>> struct fastrpc_remote_fd {
>> __u64 fd;
>> __u64 reserved1
>> __u64 reserved2
>> __u64 reserved3
>> };
>>
>> struct fastrpc_remote_attr {
>> __u64 attr;
>> __u64 reserved1
>> __u64 reserved2
>> __u64 reserved3
>> };
>>
>> struct fastrpc_remote_crc {
>> __u64 crc;
>> __u64 reserved1
>> __u64 reserved2
>> __u64 reserved3
>> };
>
> I don't see a need to add extra served fields for structures
> that are already naturally aligned here, e.g. in
> fastrpc_remote_arg we need the 'reserved1' but not
> the 'reserved2'.
Yes, I see, I overdone it!
Other idea, is, may be I can try to combine these into single structure
something like:

struct fastrpc_invoke_arg {
__u64 ptr;
__u64 len;
__u32 fd;
__u32 reserved1
__u64 attr;
__u64 crc;
};

struct fastrpc_ioctl_invoke {
__u32 handle;
__u32 sc;
/* The minimum size is scalar_length * 32*/
struct fastrpc_invoke_args *args;
};

>
>>
>> struct fastrpc_ioctl_invoke {
>> __u32 handle;
>> __u32 sc;
>> /* The minimum size is scalar_length * 32 */
>> struct fastrpc_remote_args *rargs;
>> struct fastrpc_remote_fd *fds;
>> struct fastrpc_remote_attr *attrs;
>> struct fastrpc_remote_crc *crc;
>> };
>
> Do these really have to be indirect then? Are they all
> lists of variable length? How do you know how long?
Yes, they are variable length and will be scalar length long.
Scalar length is derived from sc variable in this structure.

--srini


>
> Arnd
>

2018-11-30 16:15:02

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [RFC PATCH 2/6] char: fastrpc: Add Qualcomm fastrpc basic driver model

On Fri, Nov 30, 2018 at 10:46:53AM +0000, Srinivas Kandagatla wrote:
> This patch adds basic driver model for qualcomm fastrpc.
> Each DSP rpmsg channel is represented as fastrpc channel context and
> is exposed as a character driver for userspace interface.
> Each compute context bank is represented as fastrpc-session-context,
> which are dynamically managed by the channel context char device.
>
> Signed-off-by: Srinivas Kandagatla <[email protected]>
> ---
> drivers/char/Kconfig | 10 ++
> drivers/char/Makefile | 1 +
> drivers/char/fastrpc.c | 337 +++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 348 insertions(+)
> create mode 100644 drivers/char/fastrpc.c
>
> diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
> index 9d03b2ff5df6..75fd274c67df 100644
> --- a/drivers/char/Kconfig
> +++ b/drivers/char/Kconfig
> @@ -552,6 +552,16 @@ config ADI
> and SSM (Silicon Secured Memory). Intended consumers of this
> driver include crash and makedumpfile.
>
> +config QCOM_FASTRPC
> + tristate "Qualcomm FastRPC"
> + depends on ARCH_QCOM || COMPILE_TEST
> + depends on RPMSG
> + help
> + Provides a communication mechanism that allows for clients to
> + make remote method invocations across processor boundary to
> + applications DSP processor. Say M if you want to enable this
> + module.
> +
> endmenu
>
> config RANDOM_TRUST_CPU
> diff --git a/drivers/char/Makefile b/drivers/char/Makefile
> index b8d42b4e979b..30ec9187350e 100644
> --- a/drivers/char/Makefile
> +++ b/drivers/char/Makefile
> @@ -58,3 +58,4 @@ js-rtc-y = rtc.o
> obj-$(CONFIG_XILLYBUS) += xillybus/
> obj-$(CONFIG_POWERNV_OP_PANEL) += powernv-op-panel.o
> obj-$(CONFIG_ADI) += adi.o
> +obj-$(CONFIG_QCOM_FASTRPC) += fastrpc.o
> diff --git a/drivers/char/fastrpc.c b/drivers/char/fastrpc.c
> new file mode 100644
> index 000000000000..97d8062eb3e1
> --- /dev/null
> +++ b/drivers/char/fastrpc.c
> @@ -0,0 +1,337 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (c) 2011-2018, The Linux Foundation. All rights reserved.
> +// Copyright (c) 2018, Linaro Limited
> +
> +#include <linux/cdev.h>
> +#include <linux/device.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/idr.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/of_address.h>
> +#include <linux/of.h>
> +#include <linux/of_platform.h>
> +#include <linux/rpmsg.h>
> +#include <linux/scatterlist.h>
> +#include <linux/slab.h>
> +
> +#define ADSP_DOMAIN_ID (0)
> +#define MDSP_DOMAIN_ID (1)
> +#define SDSP_DOMAIN_ID (2)
> +#define CDSP_DOMAIN_ID (3)
> +#define FASTRPC_DEV_MAX 4 /* adsp, mdsp, slpi, cdsp*/
> +#define FASTRPC_MAX_SESSIONS 9 /*8 compute, 1 cpz*/
> +#define FASTRPC_CTX_MAX (256)
> +#define FASTRPC_CTXID_MASK (0xFF0)
> +#define FASTRPC_DEVICE_NAME "fastrpc"
> +
> +#define cdev_to_cctx(d) container_of(d, struct fastrpc_channel_ctx, cdev)
> +
> +static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
> + "sdsp", "cdsp"};
> +static dev_t fastrpc_major;

Why do you need a whole major number for this? Why not just use the
misc interface instead?


2018-11-30 16:20:34

by Srinivas Kandagatla

[permalink] [raw]
Subject: Re: [RFC PATCH 2/6] char: fastrpc: Add Qualcomm fastrpc basic driver model

Thanks for the comments!

On 30/11/18 16:13, Greg KH wrote:
>> + "sdsp", "cdsp"};
>> +static dev_t fastrpc_major;
> Why do you need a whole major number for this? Why not just use the
Not really!
> misc interface instead?

Sure, I will give that a go!
>

--srini

2018-11-30 16:21:38

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [RFC PATCH 3/6] char: fastrpc: Add support for context Invoke method

On Fri, Nov 30, 2018 at 5:03 PM Srinivas Kandagatla
<[email protected]> wrote:
> On 30/11/18 15:08, Arnd Bergmann wrote:
> > On Fri, Nov 30, 2018 at 4:01 PM Srinivas Kandagatla
> > <[email protected]> wrote:
> >> Thanks Arnd for the review comments!
> >> On 30/11/18 13:41, Arnd Bergmann wrote:
> >>> On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
> >>> <[email protected]> wrote:
> >
> >>>> +static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
> >>>> + unsigned long arg)
> >>>> +{
> >>>> + struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
> >>>> + struct fastrpc_channel_ctx *cctx = fl->cctx;
> >>>> + char __user *argp = (char __user *)arg;
> >>>> + int err;
> >>>> +
> >>>> + if (!fl->sctx) {
> >>>> + fl->sctx = fastrpc_session_alloc(cctx, 0);
> >>>> + if (!fl->sctx)
> >>>> + return -ENOENT;
> >>>> + }
> >>>
> >>> Shouldn't that session be allocated during open()?
> >>>
> >> Yes, and no, we do not need context in all the cases. In cases like we
> >> just want to allocate dmabuf.
> >
> > Can you give an example what that would be good for?
> >
>
> Currently the instance which does not need session is used as simple
> memory allocator (rpcmem), TBH, this is the side effect of trying to fit
> in with downstream application infrastructure which uses ion for andriod
> usecases.

That does not sound like enough of a reason then, user space is
easy to change here to just allocate the memory from the device itself.
The only reason that I can see for needing a dmabuf would be if
you have to share a buffer between two instances, and then you
can use either of them.

> >>>> +static void fastrpc_notify_users(struct fastrpc_user *user)
> >>>> +{
> >>>> + struct fastrpc_invoke_ctx *ctx, *n;will go
> >>>> +
> >>>> + spin_lock(&user->lock);
> >>>> + list_for_each_entry_safe(ctx, n, &user->pending, node)
> >>>> + complete(&ctx->work);
> >>>> + spin_unlock(&user->lock);
> >>>> +}
> >>>
> >>> Can you explain here what it means to have multiple 'users'
> >>> a 'fastrpc_user' structure? Why are they all done at the same time?
>
> user is allocated on every open(). Having multiple users means that
> there are more than one compute sessions running on a given dsp.
>
> They reason why all the users are notified here is because the dsp is
> going down, so all the compute sessions associated with it will not see
> any response from dsp, so any pending/waiting compute contexts are
> explicitly notified.

I don't get it yet. What are 'compute sessions'? Do you have
multiple threads running on a single instance at the same time?
I would have expected to only ever see one thread in the
'wait_for_completion()' above, and others possibly waiting
for a chance to get to but not already running.

> >> struct fastrpc_remote_crc {
> >> __u64 crc;
> >> __u64 reserved1
> >> __u64 reserved2
> >> __u64 reserved3
> >> };
> >
> > I don't see a need to add extra served fields for structures
> > that are already naturally aligned here, e.g. in
> > fastrpc_remote_arg we need the 'reserved1' but not
> > the 'reserved2'.
> Yes, I see, I overdone it!
> Other idea, is, may be I can try to combine these into single structure
> something like:
>
> struct fastrpc_invoke_arg {
> __u64 ptr;
> __u64 len;
> __u32 fd;
> __u32 reserved1
> __u64 attr;
> __u64 crc;
> };
>
> struct fastrpc_ioctl_invoke {
> __u32 handle;
> __u32 sc;
> /* The minimum size is scalar_length * 32*/
> struct fastrpc_invoke_args *args;
> };

That is still two structure, not one ;-)

> >> struct fastrpc_ioctl_invoke {
> >> __u32 handle;
> >> __u32 sc;
> >> /* The minimum size is scalar_length * 32 */
> >> struct fastrpc_remote_args *rargs;
> >> struct fastrpc_remote_fd *fds;
> >> struct fastrpc_remote_attr *attrs;
> >> struct fastrpc_remote_crc *crc;
> >> };
> >
> > Do these really have to be indirect then? Are they all
> > lists of variable length? How do you know how long?
> Yes, they are variable length and will be scalar length long.
> Scalar length is derived from sc variable in this structure.

Do you mean we have a variable number 'sc', but each array
always has the same length as the other ones? In that
case: yes, combining them seems sensible.

The other question this raises is: what is 'handle'?
Why is the file descriptor not enough to identify the
instance we want to talk to?

Arnd

2018-11-30 16:42:25

by Srinivas Kandagatla

[permalink] [raw]
Subject: Re: [RFC PATCH 3/6] char: fastrpc: Add support for context Invoke method



On 30/11/18 16:19, Arnd Bergmann wrote:
> On Fri, Nov 30, 2018 at 5:03 PM Srinivas Kandagatla
> <[email protected]> wrote:
>> On 30/11/18 15:08, Arnd Bergmann wrote:
>>> On Fri, Nov 30, 2018 at 4:01 PM Srinivas Kandagatla
>>> <[email protected]> wrote:
>>>> Thanks Arnd for the review comments!
>>>> On 30/11/18 13:41, Arnd Bergmann wrote:
>>>>> On Fri, Nov 30, 2018 at 11:48 AM Srinivas Kandagatla
>>>>> <[email protected]> wrote:
>>>
>>>>>> +static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
>>>>>> + unsigned long arg)
>>>>>> +{
>>>>>> + struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
>>>>>> + struct fastrpc_channel_ctx *cctx = fl->cctx;
>>>>>> + char __user *argp = (char __user *)arg;
>>>>>> + int err;
>>>>>> +
>>>>>> + if (!fl->sctx) {
>>>>>> + fl->sctx = fastrpc_session_alloc(cctx, 0);
>>>>>> + if (!fl->sctx)
>>>>>> + return -ENOENT;
>>>>>> + }
>>>>>
>>>>> Shouldn't that session be allocated during open()?
>>>>>
>>>> Yes, and no, we do not need context in all the cases. In cases like we
>>>> just want to allocate dmabuf.
>>>
>>> Can you give an example what that would be good for?
>>>
>>
>> Currently the instance which does not need session is used as simple
>> memory allocator (rpcmem), TBH, this is the side effect of trying to fit
>> in with downstream application infrastructure which uses ion for andriod
>> usecases.
>
> That does not sound like enough of a reason then, user space is
> easy to change here to just allocate the memory from the device itself.
> The only reason that I can see for needing a dmabuf would be if
> you have to share a buffer between two instances, and then you
> can use either of them.

I agree, I will try rework this and remove the instances that does not
require sessions!

Sharing buffer is also a reason for dmabuf here.

>
>>>>>> +static void fastrpc_notify_users(struct fastrpc_user *user)
>>>>>> +{
>>>>>> + struct fastrpc_invoke_ctx *ctx, *n;will go
>>>>>> +
>>>>>> + spin_lock(&user->lock);
>>>>>> + list_for_each_entry_safe(ctx, n, &user->pending, node)
>>>>>> + complete(&ctx->work);
>>>>>> + spin_unlock(&user->lock);
>>>>>> +}
>>>>>
>>>>> Can you explain here what it means to have multiple 'users'
>>>>> a 'fastrpc_user' structure? Why are they all done at the same time?
>>
>> user is allocated on every open(). Having multiple users means that
>> there are more than one compute sessions running on a given dsp.
>>
>> They reason why all the users are notified here is because the dsp is
>> going down, so all the compute sessions associated with it will not see
>> any response from dsp, so any pending/waiting compute contexts are
>> explicitly notified.
>
> I don't get it yet. What are 'compute sessions'? Do you have
> multiple threads running on a single instance at the same time?

compute sessions are "compute context-banks" instances in DSP.

DSP supports multiple compute banks, Ideally 12 context banks, 4 which
are reserved for other purposes and 8 of them are used for compute, one
for each process. So ideally we can run 8 parallel computes.


> I would have expected to only ever see one thread in the
> 'wait_for_completion()' above, and others possibly waiting
> for a chance to get to but not already running.
>
>>>> struct fastrpc_remote_crc {
>>>> __u64 crc;
>>>> __u64 reserved1
>>>> __u64 reserved2
>>>> __u64 reserved3
>>>> };
>>>
>>> I don't see a need to add extra served fields for structures
>>> that are already naturally aligned here, e.g. in
>>> fastrpc_remote_arg we need the 'reserved1' but not
>>> the 'reserved2'.
>> Yes, I see, I overdone it!
>> Other idea, is, may be I can try to combine these into single structure
>> something like:
>>
>> struct fastrpc_invoke_arg {
>> __u64 ptr;
>> __u64 len;
>> __u32 fd;
>> __u32 reserved1
>> __u64 attr;
>> __u64 crc;
>> };
>>
>> struct fastrpc_ioctl_invoke {
>> __u32 handle;
>> __u32 sc;
>> /* The minimum size is scalar_length * 32*/
>> struct fastrpc_invoke_args *args;
>> };
>
> That is still two structure, not one ;-)
>
>>>> struct fastrpc_ioctl_invoke {
>>>> __u32 handle;
>>>> __u32 sc;
>>>> /* The minimum size is scalar_length * 32 */
>>>> struct fastrpc_remote_args *rargs;
>>>> struct fastrpc_remote_fd *fds;
>>>> struct fastrpc_remote_attr *attrs;
>>>> struct fastrpc_remote_crc *crc;
>>>> };
>>>
>>> Do these really have to be indirect then? Are they all
>>> lists of variable length? How do you know how long?
>> Yes, they are variable length and will be scalar length long.
>> Scalar length is derived from sc variable in this structure.
>
> Do you mean we have a variable number 'sc', but each array
> always has the same length as the other ones? In that
> case: yes, combining them seems sensible.
Yes thats what I meant!

>
> The other question this raises is: what is 'handle'?
> Why is the file descriptor not enough to identify the
> instance we want to talk to?
This is remote handle to opened interface on which this method has to be
invoked.
For example we are running a calculator application, calculator will
have a unique handle on which calculate() method needs to be invoked.


thanks,
srini
>
> Arnd
>