2021-09-17 21:04:12

by Alexandre Bailon

[permalink] [raw]
Subject: [RFC PATCH 3/4] rpmsg: Add support of AI Processor Unit (APU)

Some Mediatek SoC provides hardware accelerator for AI / ML.
This driver use the DRM driver to manage the shared memory,
and use rpmsg to execute jobs on the APU.

Signed-off-by: Alexandre Bailon <[email protected]>
---
drivers/rpmsg/Kconfig | 10 +++
drivers/rpmsg/Makefile | 1 +
drivers/rpmsg/apu_rpmsg.c | 184 ++++++++++++++++++++++++++++++++++++++
3 files changed, 195 insertions(+)
create mode 100644 drivers/rpmsg/apu_rpmsg.c

diff --git a/drivers/rpmsg/Kconfig b/drivers/rpmsg/Kconfig
index 0b4407abdf138..fc1668f795004 100644
--- a/drivers/rpmsg/Kconfig
+++ b/drivers/rpmsg/Kconfig
@@ -73,4 +73,14 @@ config RPMSG_VIRTIO
select RPMSG_NS
select VIRTIO

+config RPMSG_APU
+ tristate "APU RPMSG driver"
+ select REMOTEPROC
+ select RPMSG_VIRTIO
+ select DRM_APU
+ help
+ This provides a RPMSG driver that provides some facilities to
+ communicate with an accelerated processing unit (APU).
+ This Uses the APU DRM driver to manage memory and job scheduling.
+
endmenu
diff --git a/drivers/rpmsg/Makefile b/drivers/rpmsg/Makefile
index 8d452656f0ee3..8b336b9a817c1 100644
--- a/drivers/rpmsg/Makefile
+++ b/drivers/rpmsg/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_RPMSG_QCOM_GLINK_RPM) += qcom_glink_rpm.o
obj-$(CONFIG_RPMSG_QCOM_GLINK_SMEM) += qcom_glink_smem.o
obj-$(CONFIG_RPMSG_QCOM_SMD) += qcom_smd.o
obj-$(CONFIG_RPMSG_VIRTIO) += virtio_rpmsg_bus.o
+obj-$(CONFIG_RPMSG_APU) += apu_rpmsg.o
diff --git a/drivers/rpmsg/apu_rpmsg.c b/drivers/rpmsg/apu_rpmsg.c
new file mode 100644
index 0000000000000..7e504bd176a4d
--- /dev/null
+++ b/drivers/rpmsg/apu_rpmsg.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright 2020 BayLibre SAS
+
+#include <asm/cacheflush.h>
+
+#include <linux/cdev.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-map-ops.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/remoteproc.h>
+#include <linux/rpmsg.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <drm/apu_drm.h>
+
+#include "rpmsg_internal.h"
+
+#define APU_RPMSG_SERVICE_MT8183 "rpmsg-mt8183-apu0"
+
+struct rpmsg_apu {
+ struct apu_core *core;
+ struct rpmsg_device *rpdev;
+};
+
+static int apu_rpmsg_callback(struct rpmsg_device *rpdev, void *data, int count,
+ void *priv, u32 addr)
+{
+ struct rpmsg_apu *apu = dev_get_drvdata(&rpdev->dev);
+ struct apu_core *apu_core = apu->core;
+
+ return apu_drm_callback(apu_core, data, count);
+}
+
+static int apu_rpmsg_send(struct apu_core *apu_core, void *data, int len)
+{
+ struct rpmsg_apu *apu = apu_drm_priv(apu_core);
+ struct rpmsg_device *rpdev = apu->rpdev;
+
+ return rpmsg_send(rpdev->ept, data, len);
+}
+
+static struct apu_drm_ops apu_rpmsg_ops = {
+ .send = apu_rpmsg_send,
+};
+
+static int apu_init_iovad(struct rproc *rproc, struct rpmsg_apu *apu)
+{
+ struct resource_table *table;
+ struct fw_rsc_carveout *rsc;
+ int i;
+
+ if (!rproc->table_ptr) {
+ dev_err(&apu->rpdev->dev,
+ "No resource_table: has the firmware been loaded ?\n");
+ return -ENODEV;
+ }
+
+ table = rproc->table_ptr;
+ for (i = 0; i < table->num; i++) {
+ int offset = table->offset[i];
+ struct fw_rsc_hdr *hdr = (void *)table + offset;
+
+ if (hdr->type != RSC_CARVEOUT)
+ continue;
+
+ rsc = (void *)hdr + sizeof(*hdr);
+ if (apu_drm_reserve_iova(apu->core, rsc->da, rsc->len)) {
+ dev_err(&apu->rpdev->dev,
+ "failed to reserve iova\n");
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static struct rproc *apu_get_rproc(struct rpmsg_device *rpdev)
+{
+ /*
+ * To work, the APU RPMsg driver need to get the rproc device.
+ * Currently, we only use virtio so we could use that to find the
+ * remoteproc parent.
+ */
+ if (!rpdev->dev.parent && rpdev->dev.parent->bus) {
+ dev_err(&rpdev->dev, "invalid rpmsg device\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (strcmp(rpdev->dev.parent->bus->name, "virtio")) {
+ dev_err(&rpdev->dev, "unsupported bus\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return vdev_to_rproc(dev_to_virtio(rpdev->dev.parent));
+}
+
+static int apu_rpmsg_probe(struct rpmsg_device *rpdev)
+{
+ struct rpmsg_apu *apu;
+ struct rproc *rproc;
+ int ret;
+
+ apu = devm_kzalloc(&rpdev->dev, sizeof(*apu), GFP_KERNEL);
+ if (!apu)
+ return -ENOMEM;
+ apu->rpdev = rpdev;
+
+ rproc = apu_get_rproc(rpdev);
+ if (IS_ERR_OR_NULL(rproc))
+ return PTR_ERR(rproc);
+
+ /* Make device dma capable by inheriting from parent's capabilities */
+ set_dma_ops(&rpdev->dev, get_dma_ops(rproc->dev.parent));
+
+ ret = dma_coerce_mask_and_coherent(&rpdev->dev,
+ dma_get_mask(rproc->dev.parent));
+ if (ret)
+ goto err_put_device;
+
+ rpdev->dev.iommu_group = rproc->dev.parent->iommu_group;
+
+ apu->core = apu_drm_register_core(rproc, &apu_rpmsg_ops, apu);
+ if (!apu->core) {
+ ret = -ENODEV;
+ goto err_put_device;
+ }
+
+ ret = apu_init_iovad(rproc, apu);
+
+ dev_set_drvdata(&rpdev->dev, apu);
+
+ return ret;
+
+err_put_device:
+ devm_kfree(&rpdev->dev, apu);
+
+ return ret;
+}
+
+static void apu_rpmsg_remove(struct rpmsg_device *rpdev)
+{
+ struct rpmsg_apu *apu = dev_get_drvdata(&rpdev->dev);
+
+ apu_drm_unregister_core(apu);
+ devm_kfree(&rpdev->dev, apu);
+}
+
+static const struct rpmsg_device_id apu_rpmsg_match[] = {
+ { APU_RPMSG_SERVICE_MT8183 },
+ {}
+};
+
+static struct rpmsg_driver apu_rpmsg_driver = {
+ .probe = apu_rpmsg_probe,
+ .remove = apu_rpmsg_remove,
+ .callback = apu_rpmsg_callback,
+ .id_table = apu_rpmsg_match,
+ .drv = {
+ .name = "apu_rpmsg",
+ },
+};
+
+static int __init apu_rpmsg_init(void)
+{
+ return register_rpmsg_driver(&apu_rpmsg_driver);
+}
+arch_initcall(apu_rpmsg_init);
+
+static void __exit apu_rpmsg_exit(void)
+{
+ unregister_rpmsg_driver(&apu_rpmsg_driver);
+}
+module_exit(apu_rpmsg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("APU RPMSG driver");
--
2.31.1


2021-09-23 03:34:56

by Bjorn Andersson

[permalink] [raw]
Subject: Re: [RFC PATCH 3/4] rpmsg: Add support of AI Processor Unit (APU)

On Fri 17 Sep 07:59 CDT 2021, Alexandre Bailon wrote:

> Some Mediatek SoC provides hardware accelerator for AI / ML.
> This driver use the DRM driver to manage the shared memory,
> and use rpmsg to execute jobs on the APU.
>
> Signed-off-by: Alexandre Bailon <[email protected]>
> ---
> drivers/rpmsg/Kconfig | 10 +++
> drivers/rpmsg/Makefile | 1 +
> drivers/rpmsg/apu_rpmsg.c | 184 ++++++++++++++++++++++++++++++++++++++
> 3 files changed, 195 insertions(+)
> create mode 100644 drivers/rpmsg/apu_rpmsg.c
>
> diff --git a/drivers/rpmsg/Kconfig b/drivers/rpmsg/Kconfig
> index 0b4407abdf138..fc1668f795004 100644
> --- a/drivers/rpmsg/Kconfig
> +++ b/drivers/rpmsg/Kconfig
> @@ -73,4 +73,14 @@ config RPMSG_VIRTIO
> select RPMSG_NS
> select VIRTIO
>
> +config RPMSG_APU
> + tristate "APU RPMSG driver"
> + select REMOTEPROC
> + select RPMSG_VIRTIO
> + select DRM_APU
> + help
> + This provides a RPMSG driver that provides some facilities to
> + communicate with an accelerated processing unit (APU).
> + This Uses the APU DRM driver to manage memory and job scheduling.

Similar to how a driver for e.g. an I2C device doesn't live in
drivers/i2c, this doesn't belong in drivers/rpmsg. Probably rather
directly in the DRM driver.

> +
> endmenu
> diff --git a/drivers/rpmsg/Makefile b/drivers/rpmsg/Makefile
> index 8d452656f0ee3..8b336b9a817c1 100644
> --- a/drivers/rpmsg/Makefile
> +++ b/drivers/rpmsg/Makefile
> @@ -9,3 +9,4 @@ obj-$(CONFIG_RPMSG_QCOM_GLINK_RPM) += qcom_glink_rpm.o
> obj-$(CONFIG_RPMSG_QCOM_GLINK_SMEM) += qcom_glink_smem.o
> obj-$(CONFIG_RPMSG_QCOM_SMD) += qcom_smd.o
> obj-$(CONFIG_RPMSG_VIRTIO) += virtio_rpmsg_bus.o
> +obj-$(CONFIG_RPMSG_APU) += apu_rpmsg.o
> diff --git a/drivers/rpmsg/apu_rpmsg.c b/drivers/rpmsg/apu_rpmsg.c
> new file mode 100644
> index 0000000000000..7e504bd176a4d
> --- /dev/null
> +++ b/drivers/rpmsg/apu_rpmsg.c
> @@ -0,0 +1,184 @@
> +// SPDX-License-Identifier: GPL-2.0
> +//
> +// Copyright 2020 BayLibre SAS
> +
> +#include <asm/cacheflush.h>
> +
> +#include <linux/cdev.h>
> +#include <linux/dma-buf.h>
> +#include <linux/dma-map-ops.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/iommu.h>
> +#include <linux/iova.h>
> +#include <linux/mm.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/platform_device.h>
> +#include <linux/remoteproc.h>
> +#include <linux/rpmsg.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +
> +#include <drm/apu_drm.h>
> +
> +#include "rpmsg_internal.h"
> +
> +#define APU_RPMSG_SERVICE_MT8183 "rpmsg-mt8183-apu0"
> +
> +struct rpmsg_apu {
> + struct apu_core *core;
> + struct rpmsg_device *rpdev;
> +};
> +
> +static int apu_rpmsg_callback(struct rpmsg_device *rpdev, void *data, int count,
> + void *priv, u32 addr)
> +{
> + struct rpmsg_apu *apu = dev_get_drvdata(&rpdev->dev);
> + struct apu_core *apu_core = apu->core;
> +
> + return apu_drm_callback(apu_core, data, count);
> +}
> +
> +static int apu_rpmsg_send(struct apu_core *apu_core, void *data, int len)
> +{
> + struct rpmsg_apu *apu = apu_drm_priv(apu_core);
> + struct rpmsg_device *rpdev = apu->rpdev;
> +
> + return rpmsg_send(rpdev->ept, data, len);

The rpmsg API is exposed outside drivers/rpmsg, so as I said above, just
implement this directly in your driver, no need to lug around a dummy
wrapper for things like this.

> +}
> +
> +static struct apu_drm_ops apu_rpmsg_ops = {
> + .send = apu_rpmsg_send,
> +};
> +
> +static int apu_init_iovad(struct rproc *rproc, struct rpmsg_apu *apu)
> +{
> + struct resource_table *table;
> + struct fw_rsc_carveout *rsc;
> + int i;
> +
> + if (!rproc->table_ptr) {
> + dev_err(&apu->rpdev->dev,
> + "No resource_table: has the firmware been loaded ?\n");
> + return -ENODEV;
> + }
> +
> + table = rproc->table_ptr;
> + for (i = 0; i < table->num; i++) {
> + int offset = table->offset[i];
> + struct fw_rsc_hdr *hdr = (void *)table + offset;
> +
> + if (hdr->type != RSC_CARVEOUT)
> + continue;
> +
> + rsc = (void *)hdr + sizeof(*hdr);
> + if (apu_drm_reserve_iova(apu->core, rsc->da, rsc->len)) {
> + dev_err(&apu->rpdev->dev,
> + "failed to reserve iova\n");
> + return -ENOMEM;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static struct rproc *apu_get_rproc(struct rpmsg_device *rpdev)
> +{
> + /*
> + * To work, the APU RPMsg driver need to get the rproc device.
> + * Currently, we only use virtio so we could use that to find the
> + * remoteproc parent.
> + */
> + if (!rpdev->dev.parent && rpdev->dev.parent->bus) {
> + dev_err(&rpdev->dev, "invalid rpmsg device\n");
> + return ERR_PTR(-EINVAL);
> + }
> +
> + if (strcmp(rpdev->dev.parent->bus->name, "virtio")) {
> + dev_err(&rpdev->dev, "unsupported bus\n");
> + return ERR_PTR(-EINVAL);
> + }
> +
> + return vdev_to_rproc(dev_to_virtio(rpdev->dev.parent));
> +}
> +
> +static int apu_rpmsg_probe(struct rpmsg_device *rpdev)
> +{
> + struct rpmsg_apu *apu;
> + struct rproc *rproc;
> + int ret;
> +
> + apu = devm_kzalloc(&rpdev->dev, sizeof(*apu), GFP_KERNEL);
> + if (!apu)
> + return -ENOMEM;
> + apu->rpdev = rpdev;
> +
> + rproc = apu_get_rproc(rpdev);

I believe that you can replace apu_get_rproc() with:

rproc = rproc_get_by_child(&rpdev->dev);

> + if (IS_ERR_OR_NULL(rproc))
> + return PTR_ERR(rproc);
> +
> + /* Make device dma capable by inheriting from parent's capabilities */
> + set_dma_ops(&rpdev->dev, get_dma_ops(rproc->dev.parent));
> +
> + ret = dma_coerce_mask_and_coherent(&rpdev->dev,
> + dma_get_mask(rproc->dev.parent));
> + if (ret)
> + goto err_put_device;
> +
> + rpdev->dev.iommu_group = rproc->dev.parent->iommu_group;

Would it be better or you if we have a device_node, so that you could
specify the iommus property for this compute device?

I'm asking because I've seen cases where multi-purpose remoteproc
firmware operate using multiple different iommu streams...

> +
> + apu->core = apu_drm_register_core(rproc, &apu_rpmsg_ops, apu);
> + if (!apu->core) {
> + ret = -ENODEV;
> + goto err_put_device;
> + }
> +
> + ret = apu_init_iovad(rproc, apu);
> +
> + dev_set_drvdata(&rpdev->dev, apu);
> +
> + return ret;
> +
> +err_put_device:

This label looks misplaced, and sure enough, if apu_init_iovad() fails
you're not apu_drm_unregister_core().

But on that note, don't you want to apu_init_iovad() before you
apu_drm_register_core()?

> + devm_kfree(&rpdev->dev, apu);

The reason for using devm_kzalloc() is that once you return
unsuccessfully from probe, or from remove the memory is freed.

So devm_kfree() should go in both cases.

> +
> + return ret;
> +}
> +
> +static void apu_rpmsg_remove(struct rpmsg_device *rpdev)
> +{
> + struct rpmsg_apu *apu = dev_get_drvdata(&rpdev->dev);
> +
> + apu_drm_unregister_core(apu);
> + devm_kfree(&rpdev->dev, apu);

No need to explicitly free devm resources.

Regards,
Bjorn

> +}
> +
> +static const struct rpmsg_device_id apu_rpmsg_match[] = {
> + { APU_RPMSG_SERVICE_MT8183 },
> + {}
> +};
> +
> +static struct rpmsg_driver apu_rpmsg_driver = {
> + .probe = apu_rpmsg_probe,
> + .remove = apu_rpmsg_remove,
> + .callback = apu_rpmsg_callback,
> + .id_table = apu_rpmsg_match,
> + .drv = {
> + .name = "apu_rpmsg",
> + },
> +};
> +
> +static int __init apu_rpmsg_init(void)
> +{
> + return register_rpmsg_driver(&apu_rpmsg_driver);
> +}
> +arch_initcall(apu_rpmsg_init);
> +
> +static void __exit apu_rpmsg_exit(void)
> +{
> + unregister_rpmsg_driver(&apu_rpmsg_driver);
> +}
> +module_exit(apu_rpmsg_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("APU RPMSG driver");
> --
> 2.31.1
>