2022-07-22 08:43:17

by Yuji Ishikawa

[permalink] [raw]
Subject: [PATCH v2 3/5] soc: visconti: Add Toshiba Visconti DNN image processing accelerator

Add support to DNN image processing accelerator on Toshiba Visconti ARM SoCs.
The accelerator is applicable to DNN inference tasks.

Signed-off-by: Yuji Ishikawa <[email protected]>
Reviewed-by: Nobuhiro Iwamatsu <[email protected]>
---
v1 -> v2:
- applied checkpatch.pl --strict
- removed unused code
---
drivers/soc/visconti/Kconfig | 6 +
drivers/soc/visconti/Makefile | 2 +
drivers/soc/visconti/dnn/Makefile | 6 +
drivers/soc/visconti/dnn/dnn.c | 523 +++++++++++++++++++++++++
drivers/soc/visconti/dnn/hwd_dnn.c | 183 +++++++++
drivers/soc/visconti/dnn/hwd_dnn.h | 68 ++++
drivers/soc/visconti/dnn/hwd_dnn_reg.h | 228 +++++++++++
drivers/soc/visconti/uapi/dnn.h | 77 ++++
8 files changed, 1093 insertions(+)
create mode 100644 drivers/soc/visconti/dnn/Makefile
create mode 100644 drivers/soc/visconti/dnn/dnn.c
create mode 100644 drivers/soc/visconti/dnn/hwd_dnn.c
create mode 100644 drivers/soc/visconti/dnn/hwd_dnn.h
create mode 100644 drivers/soc/visconti/dnn/hwd_dnn_reg.h
create mode 100644 drivers/soc/visconti/uapi/dnn.h

diff --git a/drivers/soc/visconti/Kconfig b/drivers/soc/visconti/Kconfig
index 8b1378917..a25287d0c 100644
--- a/drivers/soc/visconti/Kconfig
+++ b/drivers/soc/visconti/Kconfig
@@ -1 +1,7 @@
+if ARCH_VISCONTI
+
+config VISCONTI_DNN
+ bool "Visconti DNN driver"
+
+endif

diff --git a/drivers/soc/visconti/Makefile b/drivers/soc/visconti/Makefile
index 8d710da08..b9bd0f7e2 100644
--- a/drivers/soc/visconti/Makefile
+++ b/drivers/soc/visconti/Makefile
@@ -4,3 +4,5 @@
#

obj-y += ipa_common.o
+
+obj-$(CONFIG_VISCONTI_DNN) += dnn/
diff --git a/drivers/soc/visconti/dnn/Makefile b/drivers/soc/visconti/dnn/Makefile
new file mode 100644
index 000000000..52d57b60d
--- /dev/null
+++ b/drivers/soc/visconti/dnn/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Visconti DNN driver
+#
+
+obj-y += dnn.o hwd_dnn.o
diff --git a/drivers/soc/visconti/dnn/dnn.c b/drivers/soc/visconti/dnn/dnn.c
new file mode 100644
index 000000000..08dcc4e77
--- /dev/null
+++ b/drivers/soc/visconti/dnn/dnn.c
@@ -0,0 +1,523 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/* Toshiba Visconti DNN Accelerator Support
+ *
+ * (C) Copyright 2022 TOSHIBA CORPORATION
+ * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
+ */
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/wait.h>
+
+#include "../ipa_common.h"
+#include "../uapi/dnn.h"
+#include "hwd_dnn.h"
+
+struct dnn_priv {
+ struct device *dev;
+ struct miscdevice miscdev;
+ struct mutex lock;
+ void __iomem *regs;
+ int irq;
+ wait_queue_head_t waitq;
+ enum drv_ipa_state status;
+ unsigned int poll_event;
+ int id;
+ char name[16];
+ struct hwd_dnn_status hwd_status;
+
+ u32 *list_vaddr[DRV_DNN_BASE_ADDR_NUM];
+ dma_addr_t list_paddr[DRV_DNN_BASE_ADDR_NUM];
+ unsigned int err_flags[HWD_DNN_EER_FLAG_NUM];
+
+ struct dma_buf_attachment *dba[DRV_DNN_BUFFER_INDEX_MAX];
+ struct sg_table *sgt[DRV_DNN_BUFFER_INDEX_MAX];
+ enum dma_data_direction dma_dir[DRV_DNN_BUFFER_INDEX_MAX];
+ unsigned int dma_count;
+
+ dma_addr_t buffer_iova[DRV_DNN_BUFFER_INDEX_MAX];
+
+ struct drv_ipa_addr temp_list[HWD_DNN_LIST_NUM_MAX];
+};
+
+static uint32_t dnn_ipa_addr_to_iova(struct dnn_priv *priv, struct drv_ipa_addr addr)
+{
+ u32 iova = 0;
+
+ if (addr.buffer_index < priv->dma_count &&
+ addr.offset < priv->dba[addr.buffer_index]->dmabuf->size)
+ iova = priv->buffer_iova[addr.buffer_index] + addr.offset;
+ return iova;
+}
+
+static int dnn_attach_dma_buf(struct dnn_priv *priv, unsigned int buffer_index,
+ struct drv_ipa_buffer_info buffer_info[DRV_DNN_BUFFER_INDEX_MAX])
+{
+ int ret = 0;
+ dma_addr_t addr;
+
+ if (buffer_index >= DRV_DNN_BUFFER_INDEX_MAX) {
+ dev_err(priv->dev, "Buffer index invalid: index=%d\n", buffer_index);
+ return -EINVAL;
+ }
+
+ switch (buffer_info[buffer_index].direction) {
+ case DRV_IPA_DIR_NONE:
+ priv->dma_dir[priv->dma_count] = DMA_NONE;
+ break;
+ case DRV_IPA_DIR_TO_DEVICE:
+ priv->dma_dir[priv->dma_count] = DMA_TO_DEVICE;
+ break;
+ case DRV_IPA_DIR_FROM_DEVICE:
+ priv->dma_dir[priv->dma_count] = DMA_FROM_DEVICE;
+ break;
+ case DRV_IPA_DIR_BIDIRECTION:
+ priv->dma_dir[priv->dma_count] = DMA_BIDIRECTIONAL;
+ break;
+ default:
+ dev_err(priv->dev, "DMA direction invalid: index=%d dir=%d\n", buffer_index,
+ buffer_info[buffer_index].direction);
+ return -EINVAL;
+ }
+
+ ret = ipa_attach_dmabuf(priv->dev, buffer_info[buffer_index].fd,
+ &priv->dba[priv->dma_count], &priv->sgt[priv->dma_count], &addr,
+ priv->dma_dir[priv->dma_count]);
+ if (ret == 0) {
+ priv->dma_count++;
+ priv->buffer_iova[buffer_index] = addr;
+ }
+
+ return ret;
+}
+
+static void dnn_detach_dma_buf(struct dnn_priv *priv)
+{
+ struct dma_buf *dmabuf;
+ int i;
+
+ for (i = 0; i < priv->dma_count; i++) {
+ dmabuf = priv->dba[i]->dmabuf;
+ dma_buf_unmap_attachment(priv->dba[i], priv->sgt[i], priv->dma_dir[i]);
+ dma_buf_detach(dmabuf, priv->dba[i]);
+ dma_buf_put(dmabuf);
+ }
+}
+
+static irqreturn_t dnn_irq(int irq, void *dev_id)
+{
+ struct dnn_priv *priv = dev_id;
+ unsigned int event;
+
+ event = hwd_dnn_irq_handler(priv->id);
+
+ if (event & HWD_DNN_EVENT_EXEC_DONE) {
+ disable_irq_nosync(priv->irq);
+ return IRQ_WAKE_THREAD;
+ }
+
+ return IRQ_NONE;
+}
+
+static irqreturn_t dnn_irq_thread(int irq, void *dev_id)
+{
+ struct dnn_priv *priv = dev_id;
+ unsigned long delay = 1;
+
+ mutex_lock(&priv->lock);
+ dnn_detach_dma_buf(priv);
+
+ hwd_dnn_get_status(priv->id, &priv->hwd_status);
+
+ priv->status = DRV_IPA_STATE_IDLE;
+
+ /* status should be updated before poll_event so that
+ * when poll() returns, user context must observe state as idle
+ */
+ smp_wmb();
+
+ if (priv->hwd_status.eer)
+ priv->poll_event = IPA_POLL_EVENT_ERROR;
+ else
+ priv->poll_event = IPA_POLL_EVENT_DONE;
+
+ /* General barrier to avoid re-ordering of priv->poll_event=N and
+ * waitqueue_active()
+ */
+ smp_mb();
+
+ /* Threads going to sleep in poll() can miss wakeup, when wakeup is done
+ * between event check in ipa_poll() and sleeping. Wakeup repeatedly.
+ */
+ while (waitqueue_active(&priv->waitq)) {
+ wake_up_interruptible(&priv->waitq);
+
+ WARN_ON(delay > IPA_WAKEUP_RETRY_DELAY);
+ usleep_range(delay, delay + 1);
+ delay += delay;
+ }
+
+ mutex_unlock(&priv->lock);
+
+ return IRQ_HANDLED;
+}
+
+static void dnn_start(struct dnn_priv *priv, struct hwd_dnn_descriptor *desc)
+{
+ hwd_dnn_start(priv->id, desc);
+}
+
+static int dnn_parse_entry_addr(struct dnn_priv *priv, int idx, struct drv_dnn_descriptor *desc,
+ struct hwd_dnn_descriptor *hwd_desc)
+{
+ dma_addr_t addr;
+ struct drv_ipa_addr ipa_addr;
+ int j, ret = 0;
+
+ switch (desc->base_addr[idx].purpose) {
+ case DRV_DNN_BASE_ADDR_PURPOSE_OUTPUT:
+ case DRV_DNN_BASE_ADDR_PURPOSE_INPUT:
+ if (copy_from_user(priv->temp_list,
+ (void __user *)desc->base_addr[idx].addr.list_addr,
+ sizeof(struct drv_ipa_addr) * (desc->list_num + 1))) {
+ dev_err(priv->dev, "IPA address to iova conversion error: %d\n", __LINE__);
+ ret = -EFAULT;
+ break;
+ }
+
+ hwd_desc->base_addr[idx] = priv->list_paddr[idx];
+ for (j = 0; j < desc->list_num + 1; j++) {
+ ipa_addr = priv->temp_list[j];
+ addr = dnn_ipa_addr_to_iova(priv, ipa_addr);
+ if (addr == 0) {
+ dev_err(priv->dev,
+ "@LIST@ IPA address to iova conversion error: %d\n",
+ __LINE__);
+ ret = -EINVAL;
+ break;
+ }
+ dev_dbg(priv->dev, "@LIST@ %d: fd=%d %llx %x\n", idx,
+ desc->buffer_info[ipa_addr.buffer_index].fd,
+ (u64)hwd_desc->base_addr[idx], (u32)addr);
+ priv->list_vaddr[idx][j] = addr;
+ }
+ break;
+ case DRV_DNN_BASE_ADDR_PURPOSE_AWB:
+ ipa_addr = desc->base_addr[idx].addr.ipa_addr;
+ addr = dnn_ipa_addr_to_iova(priv, ipa_addr);
+ if (addr == 0) {
+ dev_err(priv->dev, "@AWB@ IPA address to iova conversion error: %d\n",
+ __LINE__);
+ ret = -EINVAL;
+ break;
+ }
+ hwd_desc->base_addr[idx] = addr;
+ dev_dbg(priv->dev, "@AWB@ %d: fd=%d %llx\n", idx,
+ desc->buffer_info[ipa_addr.buffer_index].fd, (u64)hwd_desc->base_addr[idx]);
+ break;
+ case DRV_DNN_BASE_ADDR_PURPOSE_TEMPORARY:
+ ipa_addr = desc->base_addr[idx].addr.ipa_addr;
+ addr = dnn_ipa_addr_to_iova(priv, ipa_addr);
+ if (addr == 0) {
+ dev_err(priv->dev, "@TEMP@ IPA address to iova conversion error: %d\n",
+ __LINE__);
+ ret = -EINVAL;
+ break;
+ }
+ hwd_desc->base_addr[idx] = addr;
+ dev_dbg(priv->dev, "@TEMP@ %d: fd=%d %llx\n", idx,
+ desc->buffer_info[ipa_addr.buffer_index].fd, (u64)hwd_desc->base_addr[idx]);
+ break;
+ default:
+ hwd_desc->base_addr[idx] = 0;
+ break;
+ }
+ return ret;
+}
+
+static int dnn_ioctl_start(struct dnn_priv *priv, unsigned long arg)
+{
+ struct drv_dnn_descriptor desc;
+ struct hwd_dnn_descriptor hwd_desc;
+ int i, ret = 0;
+
+ ret = mutex_lock_interruptible(&priv->lock);
+ if (ret)
+ return ret;
+
+ if (priv->status == DRV_IPA_STATE_BUSY) {
+ dev_dbg(priv->dev, "busy: %d\n", priv->status);
+ mutex_unlock(&priv->lock);
+ return -EBUSY;
+ }
+
+ if (copy_from_user(&desc, (void __user *)arg, sizeof(struct drv_dnn_descriptor))) {
+ dev_err(priv->dev, "Descriptor memory access error\n");
+ ret = -EFAULT;
+ goto err1;
+ }
+
+ if (DRV_DNN_BIT_CONFIG_DESC_FINAL != (desc.config_done & DRV_DNN_BIT_CONFIG_DESC_FINAL)) {
+ dev_err(priv->dev, "Descriptor configuration not complete\n");
+ ret = -EINVAL;
+ goto err1;
+ }
+
+ priv->dma_count = 0;
+
+ /* setup buffer */
+ for (i = 0; i < desc.buffer_info_num; i++) {
+ ret = dnn_attach_dma_buf(priv, i, desc.buffer_info);
+ if (ret) {
+ dev_err(priv->dev, "dma buf attach error: index=%d\n", i);
+ goto err2;
+ }
+ dev_dbg(priv->dev, "@buffer[%d]@: fd=%d %s iova=%llx\n", i, desc.buffer_info[i].fd,
+ desc.buffer_info[i].coherent ? "coherent" : "non-coherent",
+ (uint64_t)priv->buffer_iova[i]);
+ }
+
+ /* get iova address of configuration area */
+ hwd_desc.configuration = dnn_ipa_addr_to_iova(priv, desc.configuration);
+ if (hwd_desc.configuration == 0) {
+ dev_err(priv->dev, "Invalid IPA Address: configuration %s: %d\n", __func__,
+ __LINE__);
+ ret = -EINVAL;
+ goto err2;
+ }
+
+ dev_dbg(priv->dev, "@config@: fd=%d %llx\n",
+ desc.buffer_info[desc.configuration.buffer_index].fd,
+ (uint64_t)hwd_desc.configuration);
+
+ for (i = 0; i < DRV_DNN_BASE_ADDR_NUM; i++) {
+ ret = dnn_parse_entry_addr(priv, i, &desc, &hwd_desc);
+ if (ret)
+ goto err2;
+ }
+
+ hwd_desc.configuration_size = desc.configuration_size;
+ hwd_desc.list_num = desc.list_num;
+ hwd_desc.eer_flags_addr = priv->err_flags;
+ hwd_desc.base_addr_flag = desc.base_addr_flag;
+ hwd_desc.config_done = desc.config_done;
+
+ hwd_desc.configuration += desc.configuration_offset;
+
+ dnn_start(priv, &hwd_desc);
+
+ priv->poll_event = IPA_POLL_EVENT_NONE;
+ priv->status = DRV_IPA_STATE_BUSY;
+ enable_irq(priv->irq);
+
+ mutex_unlock(&priv->lock);
+
+ return ret;
+
+err2:
+ dnn_detach_dma_buf(priv);
+err1:
+ mutex_unlock(&priv->lock);
+ return ret;
+}
+
+static int dnn_ioctl_get_status(struct dnn_priv *priv, unsigned long arg)
+{
+ struct drv_dnn_status status;
+ struct hwd_dnn_status hwd_status;
+ int ret = 0, i;
+
+ ret = mutex_lock_interruptible(&priv->lock);
+ if (ret)
+ return ret;
+
+ if (priv->status == DRV_IPA_STATE_BUSY)
+ hwd_dnn_get_status(priv->id, &hwd_status);
+ else
+ hwd_status = priv->hwd_status;
+
+ status.state = priv->status;
+ mutex_unlock(&priv->lock);
+
+ status.eer_cmd = hwd_status.eer_cmd;
+ status.eer = hwd_status.eer;
+ for (i = 0; i < HWD_DNN_EER_FLAG_NUM; i++)
+ status.eer_flags[i] = priv->err_flags[i];
+
+ if (copy_to_user((void __user *)arg, &status, sizeof(struct drv_dnn_status))) {
+ dev_err(priv->dev, "status memory access error\n");
+ ret = -EFAULT;
+ }
+
+ return ret;
+}
+
+static long dnn_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
+{
+ struct dnn_priv *priv = container_of(fp->private_data, struct dnn_priv, miscdev);
+ int ret = 0;
+
+ switch (cmd) {
+ case IOC_IPA_START:
+ ret = dnn_ioctl_start(priv, arg);
+ break;
+ case IOC_IPA_GET_STATUS:
+ ret = dnn_ioctl_get_status(priv, arg);
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+
+ return ret;
+}
+
+static __poll_t dnn_poll(struct file *fp, poll_table *wait)
+{
+ struct dnn_priv *priv = container_of(fp->private_data, struct dnn_priv, miscdev);
+ __poll_t mask = 0;
+ unsigned int poll_event;
+
+ poll_wait(fp, &priv->waitq, wait);
+
+ /* Barrier to avoid re-ordering of poll_wait() and event load
+ * Read barrier here and release barrier in poll_wait() together will
+ * prevent re-ordering
+ */
+ smp_rmb();
+ poll_event = priv->poll_event;
+ if (poll_event != IPA_POLL_EVENT_NONE) {
+ mask = EPOLLIN | EPOLLRDNORM;
+ if (poll_event == IPA_POLL_EVENT_ERROR)
+ mask |= EPOLLERR;
+ }
+ return mask;
+}
+
+static const struct file_operations dnn_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = dnn_ioctl,
+ .poll = dnn_poll,
+};
+
+static int dnn_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct dnn_priv *priv;
+ int i, ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ mutex_init(&priv->lock);
+
+ /* update DMA mask */
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36));
+ if (ret)
+ return ret;
+
+ priv->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->regs))
+ return PTR_ERR(priv->regs);
+
+ priv->irq = platform_get_irq(pdev, 0);
+ if (priv->irq < 0)
+ return priv->irq;
+
+ ret = devm_request_threaded_irq(dev, priv->irq, dnn_irq, dnn_irq_thread, 0, "dnn", priv);
+ if (ret) {
+ dev_err(dev, "irq request failed\n");
+ return ret;
+ }
+ disable_irq(priv->irq);
+
+ ret = of_property_read_u32(dev->of_node, "index", &priv->id);
+ if (ret) {
+ dev_err(dev, "failed to acquire irq resource\n");
+ return ret;
+ }
+
+ /*
+ * allocate uncached-memory to hold address-list for DNN operation
+ * to hold ptr to 32bit addr memory block
+ * for each base address (upto HWD_DNN_BASE_ADDR_NUM)
+ * for each iteration (up to HWD_DNN_LIST_NUM_MAX)
+ * uint32_t list_addr [HWD_DNN_BASE_ADDR_NUM] [HWD_DNN_LIST_NUM_MAX];
+ */
+ priv->list_vaddr[0] =
+ dma_alloc_wc(dev, sizeof(u32) * HWD_DNN_LIST_NUM_MAX * HWD_DNN_BASE_ADDR_NUM,
+ &priv->list_paddr[0], GFP_KERNEL);
+ if (!priv->list_vaddr[0]) {
+ dev_err(dev, "dma_alloc_wc failed\n");
+ return -ENOMEM;
+ }
+ for (i = 1; i < HWD_DNN_BASE_ADDR_NUM; i++) {
+ priv->list_vaddr[i] = priv->list_vaddr[0] + HWD_DNN_LIST_NUM_MAX * i;
+ priv->list_paddr[i] = priv->list_paddr[0] + sizeof(u32) * HWD_DNN_LIST_NUM_MAX * i;
+ }
+
+ hwd_dnn_initialize(priv->id, priv->regs);
+
+ snprintf(priv->name, sizeof(priv->name), "dnn%d", priv->id);
+ priv->miscdev.minor = MISC_DYNAMIC_MINOR;
+ priv->miscdev.name = priv->name;
+ priv->miscdev.fops = &dnn_fops;
+ ret = misc_register(&priv->miscdev);
+ if (ret) {
+ dev_err(dev, "misc registration failed\n");
+ hwd_dnn_uninitialize(priv->id);
+ dma_free_wc(dev, sizeof(u32) * HWD_DNN_LIST_NUM_MAX * HWD_DNN_BASE_ADDR_NUM,
+ priv->list_vaddr[0], priv->list_paddr[0]);
+ return ret;
+ }
+
+ priv->dev = dev;
+ platform_set_drvdata(pdev, priv);
+
+ init_waitqueue_head(&priv->waitq);
+
+ priv->status = DRV_IPA_STATE_IDLE;
+ return 0;
+}
+
+static int dnn_remove(struct platform_device *pdev)
+{
+ struct dnn_priv *priv = platform_get_drvdata(pdev);
+
+ misc_deregister(&priv->miscdev);
+ hwd_dnn_uninitialize(priv->id);
+ dma_free_wc(&pdev->dev, sizeof(u32) * HWD_DNN_LIST_NUM_MAX * HWD_DNN_BASE_ADDR_NUM,
+ priv->list_vaddr[0], priv->list_paddr[0]);
+ return 0;
+}
+
+static const struct of_device_id dnn_of_match[] = {
+ {
+ .compatible = "toshiba,visconti-dnn",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(platform, dnn_of_match);
+
+static struct platform_driver dnn_driver = {
+ .probe = dnn_probe,
+ .remove = dnn_remove,
+ .driver = {
+ .name = "visconti_dnn",
+ .of_match_table = of_match_ptr(dnn_of_match),
+ },
+};
+module_platform_driver(dnn_driver);
+
+MODULE_AUTHOR("Yuji Ishikawa <[email protected]>");
+MODULE_DESCRIPTION("Toshiba Visconti DNN driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/soc/visconti/dnn/hwd_dnn.c b/drivers/soc/visconti/dnn/hwd_dnn.c
new file mode 100644
index 000000000..832040329
--- /dev/null
+++ b/drivers/soc/visconti/dnn/hwd_dnn.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/* Toshiba Visconti DNN Accelerator Support
+ *
+ * (C) Copyright 2022 TOSHIBA CORPORATION
+ * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
+ */
+
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+
+#include "hwd_dnn.h"
+#include "hwd_dnn_reg.h"
+
+#define HWD_DNN_INT_END BIT(0)
+#define HWD_DNN_INT_EER BIT(24)
+#define HWD_DNN_INT_UNUSED (0x003f0100U)
+#define HWD_DNN_INT_MASK_CONFIG (HWD_DNN_INT_UNUSED | HWD_DNN_INT_EER)
+#define HWD_DNN_INT_CLEAR_ALL (HWD_DNN_INT_END | HWD_DNN_INT_EER)
+#define HWD_DNN_EXE_CONFIG_MODE (0x3U)
+
+#define HWD_DNN_CGATE_ORD_CONFIG BIT(17)
+
+/**
+ * struct hwd_dnn_resources - HWD driver internal resource structure
+ *
+ * @reg: control register
+ * @status: driver status
+ * @eer_flags_addr: Address of execution error flags
+ */
+struct hwd_dnn_resources {
+ struct hwd_dnn_reg *reg;
+ struct hwd_dnn_status status;
+ u32 *eer_flags_addr;
+};
+
+/* HWD driver internal resource */
+static struct hwd_dnn_resources hwd_dnn_resources[HWD_DNN_DEVICE_MAX] = {};
+
+/**
+ * hwd_dnn_initialize() - Initialize DNN device
+ *
+ * @module_id: @ref hwd_dnn_device "id" of the h/w module
+ * @vaddr: register base virtual address
+ * Return: 0 operation completed successfully
+ */
+void hwd_dnn_initialize(u32 module_id, void *vaddr)
+{
+ struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id];
+
+ /* Initialize the device */
+ res->reg = (struct hwd_dnn_reg *)vaddr;
+}
+
+/**
+ * hwd_dnn_uninitialize() - Uninitialize DNN device
+ *
+ * @module_id: @ref hwd_dnn_device "id" of the h/w module
+ */
+void hwd_dnn_uninitialize(u32 module_id)
+{
+ struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id];
+
+ res->reg = NULL;
+}
+
+/**
+ * hwd_dnn_start() - Start DNN device
+ *
+ * @module_id: @ref hwd_dnn_device "id" of the h/w module
+ * @desc: Pointer to descriptor structure
+ */
+void hwd_dnn_start(u32 module_id, const struct hwd_dnn_descriptor *desc)
+{
+ struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id];
+ struct hwd_dnn_reg *reg;
+ u32 base_addr_flag;
+ int i;
+
+ reg = res->reg;
+
+ /* Configure the registers with user provided values */
+ writel(desc->configuration & GENMASK(31, 0), &reg->cfg_addr);
+ writel(desc->configuration_size, &reg->cfg_size);
+ writel(desc->list_num, &reg->cfg_rpt);
+
+ base_addr_flag = desc->base_addr_flag;
+ for (i = 0; i < HWD_DNN_BASE_ADDR_NUM; i++) {
+ if (base_addr_flag & BIT(i)) {
+ /* 1: address list is specified */
+ writel(0U, &reg->base_addr[i]);
+ writel(desc->base_addr[i] & GENMASK(31, 0), &reg->base_list[i]);
+ } else {
+ /* 0: region address is specified */
+ writel(desc->base_addr[i] & GENMASK(31, 0), &reg->base_addr[i]);
+ writel(0U, &reg->base_list[i]);
+ }
+ }
+
+ writel(base_addr_flag, &reg->base_list_cfg);
+
+ /* clear and unmask interrupts */
+ writel(HWD_DNN_INT_CLEAR_ALL, &reg->intstat);
+ writel(HWD_DNN_INT_MASK_CONFIG, &reg->intmask);
+
+ res->status.eer_cmd = 0;
+ res->status.eer = 0;
+ res->eer_flags_addr = desc->eer_flags_addr;
+
+ /*Invalidate load command in Clock Gating Control Register */
+ writel(HWD_DNN_CGATE_ORD_CONFIG, &reg->cgate_ord);
+
+ /* Sync memory barrier */
+ dsb(st);
+
+ /* Start the device */
+ writel(HWD_DNN_EXE_CONFIG_MODE, &reg->exe);
+}
+
+/**
+ * hwd_dnn_irq_handler() - HWD DNN interrupt handler
+ *
+ * @module_id: id of the h/w module
+ * Return: HWD_DNN_EVENT_EXEC_DONE Bit field saying Configuration processing is completed
+ */
+u32 hwd_dnn_irq_handler(u32 module_id)
+{
+ struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id];
+ struct hwd_dnn_reg *reg;
+ u32 event = 0;
+ u32 int_stat;
+ u32 cause;
+ int i;
+
+ reg = res->reg;
+
+ /* Read the interrupt causes */
+ cause = readl(&reg->mask_intstat);
+
+ /* Read the interrupt status register */
+ int_stat = readl(&reg->intstat);
+
+ /*
+ * Read the error command.
+ * Clearing DNN_INTSTAT.EER interrupt cause initializes the error command register.
+ * Read error command before clearing the interrupt cause.
+ */
+ res->status.eer_cmd = readl(&reg->int_eer_cmd);
+
+ /* Clear the interrupt causes */
+ writel(cause, &reg->intstat);
+
+ if (cause & HWD_DNN_INT_END) {
+ /* Get EER flags, if requestd */
+ if (res->eer_flags_addr)
+ for (i = 0; i < HWD_DNN_EER_FLAG_NUM; i++)
+ res->eer_flags_addr[i] = readl(&reg->eer_flag[i]);
+
+ event = HWD_DNN_EVENT_EXEC_DONE;
+ }
+
+ if (int_stat & HWD_DNN_INT_EER) {
+ /* Operation error occurred */
+ res->status.eer = 1;
+ }
+
+ return event;
+}
+
+/**
+ * hwd_dnn_get_status() - HWD DNN Get Status
+ *
+ * @module_id: @ref hwd_dnn_device "id" of the h/w module
+ * @status: Pointer to status structure
+ */
+void hwd_dnn_get_status(u32 module_id, struct hwd_dnn_status *status)
+{
+ const struct hwd_dnn_resources *res = &hwd_dnn_resources[module_id];
+
+ /* Update device status */
+ status->eer_cmd = res->status.eer_cmd;
+ status->eer = res->status.eer;
+}
diff --git a/drivers/soc/visconti/dnn/hwd_dnn.h b/drivers/soc/visconti/dnn/hwd_dnn.h
new file mode 100644
index 000000000..84425990c
--- /dev/null
+++ b/drivers/soc/visconti/dnn/hwd_dnn.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/* Toshiba Visconti DNN Accelerator Support
+ *
+ * (C) Copyright 2022 TOSHIBA CORPORATION
+ * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
+ */
+
+#ifndef HWD_DNN_H
+#define HWD_DNN_H
+
+/**
+ * enum hwd_dnn_device_id - device ids for available DNN Accelerators
+ */
+enum hwd_dnn_device_id { HWD_DNN_DEVICE_0 = 0, HWD_DNN_DEVICE_1 = 1, HWD_DNN_DEVICE_MAX = 2 };
+
+/* DNN events returned by irq handler */
+#define HWD_DNN_EVENT_EXEC_DONE (1U)
+
+/* number of base address available for DNN */
+#define HWD_DNN_BASE_ADDR_NUM (8U)
+
+/* number of error flag registers */
+#define HWD_DNN_EER_FLAG_NUM (32U)
+
+/* DNN hardware can execute 1024 iterations for different dataset at a time */
+#define HWD_DNN_LIST_NUM_MAX (1024U)
+
+/**
+ * struct hwd_dnn_status - DNN HWD Driver Status
+ *
+ * @eer_cmd: Error Command
+ * @eer: Execution Error
+ * @reserved: Padding
+ */
+struct hwd_dnn_status {
+ u32 eer_cmd;
+ u32 eer : 1;
+ u32 reserved : 31;
+};
+
+/**
+ * struct hwd_dnn_descriptor - HWD DNN Descriptor
+ *
+ * @configuration: Configuration data
+ * @configuration_size: Configuration data size
+ * @list_num: Number of input/output list
+ * @base_addr: Base Address
+ * @eer_flags_addr: Address of storing execution error flags
+ * @base_addr_flag: Bit-fields of Base Address list config. If 1, address list. If 0, fixed address
+ * @config_done: Flags of called configuration
+ */
+struct hwd_dnn_descriptor {
+ u32 configuration;
+ u32 configuration_size;
+ u32 list_num;
+ u32 base_addr[HWD_DNN_BASE_ADDR_NUM];
+ u32 *eer_flags_addr;
+ u32 base_addr_flag;
+ u16 config_done;
+};
+
+void hwd_dnn_initialize(u32 module_id, void *vaddr);
+void hwd_dnn_uninitialize(u32 module_id);
+void hwd_dnn_start(u32 module_id, const struct hwd_dnn_descriptor *desc);
+u32 hwd_dnn_irq_handler(u32 module_id);
+void hwd_dnn_get_status(u32 module_id, struct hwd_dnn_status *status);
+
+#endif /* HWD_DNN_H */
diff --git a/drivers/soc/visconti/dnn/hwd_dnn_reg.h b/drivers/soc/visconti/dnn/hwd_dnn_reg.h
new file mode 100644
index 000000000..c1271e092
--- /dev/null
+++ b/drivers/soc/visconti/dnn/hwd_dnn_reg.h
@@ -0,0 +1,228 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/* Toshiba Visconti DNN Accelerator Support
+ *
+ * (C) Copyright 2022 TOSHIBA CORPORATION
+ * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
+ */
+
+#ifndef HWD_DNN_REG_H
+#define HWD_DNN_REG_H
+
+/**
+ * struct hwd_dnn_reg - DNN register address struct
+ */
+struct hwd_dnn_reg {
+ u32 version;
+ u32 exe;
+ u32 reserved_a_1;
+ u32 intstat;
+ u32 intmask;
+ u32 mask_intstat;
+ u32 reserved_a_2;
+ u32 int_eer_cmd;
+ u32 reserved_b_1[8];
+ u32 cfg_cid;
+ u32 cfg_cnm;
+ u32 reserved_b_2[10];
+ u32 reserved_a_3;
+ u32 cgate_ord;
+ u32 reserved_b_3[2];
+ u32 eer_flag[32];
+ u32 cfg_addr;
+ u32 cfg_size;
+ u32 cfg_rpt;
+ u32 reserved_b_4[1];
+ u32 base_list_cfg;
+ u32 reserved_b_5[11];
+ u32 base_addr[8];
+ u32 reserved_b_6[24];
+ u32 base_list[8];
+ u32 reserved_b_7[8];
+ u32 cmd_load;
+ u32 ld_format;
+ u32 reserved_b_8[14];
+ u32 ld_g_size0;
+ u32 ld_g_size1;
+ u32 reserved_b_9[2];
+ u32 reserved_a_4;
+ u32 reserved_b_10[1];
+ u32 ld_g_base_id;
+ u32 ld_g_offset;
+ u32 ld_g_lptch;
+ u32 ld_g_cptch;
+ u32 ld_g_vptch;
+ u32 reserved_b_11[5];
+ u32 ld_l_size0;
+ u32 ld_l_size1;
+ u32 reserved_b_12[2];
+ u32 ld_l_addr;
+ u32 reserved_b_13[3];
+ u32 ld_l_lptch;
+ u32 ld_l_cptch;
+ u32 ld_l_vptch;
+ u32 reserved_b_14[17];
+ u32 reserved_a_5;
+ u32 reserved_b_15[3];
+ u32 cmd_store;
+ u32 st_format;
+ u32 reserved_b_16[14];
+ u32 st_g_size0;
+ u32 st_g_size1;
+ u32 reserved_b_17[2];
+ u32 reserved_a_6;
+ u32 reserved_b_18[1];
+ u32 st_g_base_id;
+ u32 st_g_offset;
+ u32 st_g_lptch;
+ u32 st_g_cptch;
+ u32 st_g_vptch;
+ u32 reserved_b_19[5];
+ u32 st_l_size0;
+ u32 st_l_size1;
+ u32 reserved_b_20[2];
+ u32 st_l_addr;
+ u32 reserved_b_21[3];
+ u32 st_l_lptch;
+ u32 st_l_cptch;
+ u32 st_l_vptch;
+ u32 reserved_b_22[17];
+ u32 reserved_a_7;
+ u32 reserved_b_23[3];
+ u32 cmd_copy;
+ u32 reserved_b_24[15];
+ u32 cp_src_size0;
+ u32 cp_src_size1;
+ u32 reserved_b_25[2];
+ u32 cp_src_addr;
+ u32 reserved_b_26[3];
+ u32 cp_src_lptch;
+ u32 cp_src_cptch;
+ u32 reserved_a_8;
+ u32 reserved_b_27[5];
+ u32 cp_dst_size0;
+ u32 cp_dst_size1;
+ u32 reserved_b_28[2];
+ u32 cp_dst_addr;
+ u32 reserved_b_29[3];
+ u32 cp_dst_lptch;
+ u32 cp_dst_cptch;
+ u32 reserved_a_9;
+ u32 reserved_b_30[17];
+ u32 reserved_a_10;
+ u32 reserved_b_31[3];
+ u32 cmd_fill;
+ u32 reserved_b_32[1];
+ u32 fl_cfg;
+ u32 reserved_b_33[13];
+ u32 fl_src_size0;
+ u32 fl_src_size1;
+ u32 reserved_b_34[2];
+ u32 fl_src_addr;
+ u32 reserved_b_35[3];
+ u32 fl_src_lptch;
+ u32 fl_src_cptch;
+ u32 reserved_b_36[6];
+ u32 fl_dst_size0;
+ u32 fl_dst_size1;
+ u32 reserved_b_37[2];
+ u32 fl_dst_addr;
+ u32 reserved_b_38[3];
+ u32 fl_dst_lptch;
+ u32 fl_dst_cptch;
+ u32 reserved_b_39[22];
+ u32 cmd_sync;
+ u32 reserved_b_40[1];
+ u32 cmd_srst;
+ u32 reserved_b_41[1];
+ u32 cmd_halt;
+ u32 reserved_b_42[3];
+ u32 cmd_finish;
+ u32 reserved_b_43[119];
+ u32 cmd_exe;
+ u32 exe_mode;
+ u32 reserved_b_44[14];
+ u32 exe_inf_size0;
+ u32 exe_inf_size1;
+ u32 reserved_b_45[2];
+ u32 exe_inf_addr;
+ u32 reserved_b_46[3];
+ u32 exe_inf_lptch;
+ u32 exe_inf_cptch;
+ u32 reserved_b_47[26];
+ u32 exe_conv_bias_addr;
+ u32 reserved_b_48[27];
+ u32 exe_conv_wt_size0;
+ u32 exe_conv_wt_size1;
+ u32 reserved_b_49[2];
+ u32 exe_conv_wt_addr;
+ u32 reserved_b_50[3];
+ u32 exe_conv_wt_lptch;
+ u32 exe_conv_wt_cptch;
+ u32 exe_conv_wt_vptch;
+ u32 reserved_b_51[9];
+ u32 exe_conv_acv_addr;
+ u32 reserved_b_52[3];
+ u32 exe_conv_acv_lptch;
+ u32 exe_conv_acv_cptch;
+ u32 reserved_b_53[18];
+ u32 exe_conv_out;
+ u32 reserved_b_54[11];
+ u32 actv_tbl_range;
+ u32 reserved_a_11;
+ u32 actv_tbl_prm;
+ u32 reserved_b_55[5];
+ u32 exe_pool_size;
+ u32 reserved_b_56[43];
+ u32 exe_pool_out;
+ u32 exe_pool_pe_crop;
+ u32 reserved_b_57[10];
+ u32 exe_norm_g_prm;
+ u32 reserved_b_58[15];
+ u32 exe_sav_max_prm;
+ u32 reserved_b_59[35];
+ u32 exe_norm_out;
+ u32 reserved_b_60[23];
+ u32 exe_outf_addr;
+ u32 reserved_b_61[3];
+ u32 exe_outf_lptch;
+ u32 exe_outf_cptch;
+ u32 reserved_b_62[102];
+ u32 reserved_a_12;
+ u32 reserved_b_63[8];
+ u32 reserved_a_13;
+ u32 reserved_a_14;
+ u32 reserved_a_15;
+ u32 reserved_b_64[4];
+ u32 reserved_a_16;
+ u32 reserved_a_17;
+ u32 reserved_b_65[46];
+ u32 exe_pool_pa3_0;
+ u32 exe_pool_pa3_2;
+ u32 exe_pool_pa3_4;
+ u32 exe_pool_pa3_6;
+ u32 exe_pool_pa3_8;
+ u32 exe_pool_pa3_10;
+ u32 exe_pool_pa3_12;
+ u32 exe_pool_pa3_14;
+ u32 exe_pool_pa3_16;
+ u32 exe_pool_pa3_18;
+ u32 exe_pool_pa3_20;
+ u32 exe_pool_pa3_22;
+ u32 exe_pool_pa3_24;
+ u32 reserved_b_66[3];
+ u32 exe_pool_pa2_0;
+ u32 exe_pool_pa2_2;
+ u32 exe_pool_pa2_4;
+ u32 exe_pool_pa2_6;
+ u32 exe_pool_pa2_8;
+ u32 exe_pool_pa2_10;
+ u32 exe_pool_pa2_12;
+ u32 exe_pool_pa2_14;
+ u32 exe_pool_pa2_16;
+ u32 exe_pool_pa2_18;
+ u32 exe_pool_pa2_20;
+ u32 exe_pool_pa2_22;
+ u32 exe_pool_pa2_24;
+};
+
+#endif /* HWD_DNN_REG_H */
diff --git a/drivers/soc/visconti/uapi/dnn.h b/drivers/soc/visconti/uapi/dnn.h
new file mode 100644
index 000000000..322c6b13d
--- /dev/null
+++ b/drivers/soc/visconti/uapi/dnn.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Toshiba Visconti DNN Accelerator Support
+ *
+ * (C) Copyright 2022 TOSHIBA CORPORATION
+ * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
+ */
+
+#ifndef _UAPI_LINUX_DNN_H
+#define _UAPI_LINUX_DNN_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include "ipa.h"
+
+#define DRV_DNN_BIT_CONFIG_DESC_FINAL (0x8000U)
+#define DRV_DNN_BUFFER_INDEX_MAX (15)
+
+#define DRV_DNN_BASE_ADDR_NUM (8U) /* DNN number of base address */
+
+#define DRV_DNN_BASE_ADDR_PURPOSE_INPUT (1U)
+#define DRV_DNN_BASE_ADDR_PURPOSE_OUTPUT (2U)
+#define DRV_DNN_BASE_ADDR_PURPOSE_AWB (3U)
+#define DRV_DNN_BASE_ADDR_PURPOSE_TEMPORARY (4U)
+
+/**
+ * struct drv_dnn_status - DNN IPA status for IOC_IPA_GET_STATUS
+ *
+ * @state: State of driver
+ * @eer_cmd: Execution error command
+ * @eer: Execution error
+ * @reserved: Padding
+ * @eer_flags: Execution error flags
+ */
+struct drv_dnn_status {
+ enum drv_ipa_state state;
+ __u32 eer_cmd;
+ __u32 eer : 1;
+ __u32 reserved : 31;
+ __u32 eer_flags[32];
+};
+
+struct drv_dnn_base_addr {
+ __u32 purpose;
+ union {
+ struct drv_ipa_addr ipa_addr;
+ uintptr_t list_addr;
+ } addr;
+};
+
+/**
+ * struct drv_dnn_descriptor - DNN IPA Descriptor for IOC_IPA_START
+ *
+ * @configuration: Address of configuration data
+ * @configuration_offset: Configuration offset
+ * @configuration_size: Configuration data size
+ * @list_num: Number of input/output list
+ * @base_addr: Base addresses
+ * @base_addr_flag: Bit-fields of base_addr list config;
+ * 0 for fixed address,
+ * 1 for address list.
+ * @config_done: Flags of called configuration
+ * @buffer_info: Table of buffer information
+ * @buffer_info_num: Number of buffer_info
+ */
+struct drv_dnn_descriptor {
+ struct drv_ipa_addr configuration;
+ __u32 configuration_offset;
+ __u32 configuration_size;
+ __u32 list_num;
+ struct drv_dnn_base_addr base_addr[DRV_DNN_BASE_ADDR_NUM];
+ __u32 base_addr_flag;
+ __u16 config_done;
+ struct drv_ipa_buffer_info buffer_info[DRV_DNN_BUFFER_INDEX_MAX];
+ __s32 buffer_info_num;
+};
+
+#endif /* _UAPI_LINUX_DNN_H */
--
2.17.1



2022-07-25 13:06:13

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] soc: visconti: Add Toshiba Visconti DNN image processing accelerator

On Fri, Jul 22, 2022 at 05:28:56PM +0900, Yuji Ishikawa wrote:
> --- /dev/null
> +++ b/drivers/soc/visconti/uapi/dnn.h
> @@ -0,0 +1,77 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/* Toshiba Visconti DNN Accelerator Support
> + *
> + * (C) Copyright 2022 TOSHIBA CORPORATION
> + * (C) Copyright 2022 Toshiba Electronic Devices & Storage Corporation
> + */
> +
> +#ifndef _UAPI_LINUX_DNN_H
> +#define _UAPI_LINUX_DNN_H
> +
> +#include <linux/ioctl.h>
> +#include <linux/types.h>
> +#include "ipa.h"
> +
> +#define DRV_DNN_BIT_CONFIG_DESC_FINAL (0x8000U)
> +#define DRV_DNN_BUFFER_INDEX_MAX (15)
> +
> +#define DRV_DNN_BASE_ADDR_NUM (8U) /* DNN number of base address */
> +
> +#define DRV_DNN_BASE_ADDR_PURPOSE_INPUT (1U)
> +#define DRV_DNN_BASE_ADDR_PURPOSE_OUTPUT (2U)
> +#define DRV_DNN_BASE_ADDR_PURPOSE_AWB (3U)
> +#define DRV_DNN_BASE_ADDR_PURPOSE_TEMPORARY (4U)
> +
> +/**
> + * struct drv_dnn_status - DNN IPA status for IOC_IPA_GET_STATUS
> + *
> + * @state: State of driver
> + * @eer_cmd: Execution error command
> + * @eer: Execution error
> + * @reserved: Padding
> + * @eer_flags: Execution error flags
> + */
> +struct drv_dnn_status {
> + enum drv_ipa_state state;
> + __u32 eer_cmd;
> + __u32 eer : 1;
> + __u32 reserved : 31;

bitfields will not work like this for uapi files, sorry.

> + __u32 eer_flags[32];

What endian is all of these? Big? Little? Unknown?

> +};
> +
> +struct drv_dnn_base_addr {
> + __u32 purpose;
> + union {
> + struct drv_ipa_addr ipa_addr;
> + uintptr_t list_addr;

You really do not ever want a uintptr_t in a uapi file, that's not going
to be portable at all. It's also not a valid kernel type :(

No documentation on what "purpose" is for?

> + } addr;
> +};
> +
> +/**
> + * struct drv_dnn_descriptor - DNN IPA Descriptor for IOC_IPA_START
> + *
> + * @configuration: Address of configuration data
> + * @configuration_offset: Configuration offset
> + * @configuration_size: Configuration data size
> + * @list_num: Number of input/output list
> + * @base_addr: Base addresses
> + * @base_addr_flag: Bit-fields of base_addr list config;
> + * 0 for fixed address,
> + * 1 for address list.

Where are the bitfield definitions?

What about unused bits, what happens with them? You are checking them,
right?

> + * @config_done: Flags of called configuration
> + * @buffer_info: Table of buffer information
> + * @buffer_info_num: Number of buffer_info
> + */
> +struct drv_dnn_descriptor {
> + struct drv_ipa_addr configuration;
> + __u32 configuration_offset;

What endian are any of these?

thanks,

greg k-h

2022-07-25 13:33:15

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] soc: visconti: Add Toshiba Visconti DNN image processing accelerator

On Fri, Jul 22, 2022 at 05:28:56PM +0900, Yuji Ishikawa wrote:
> Add support to DNN image processing accelerator on Toshiba Visconti ARM SoCs.
> The accelerator is applicable to DNN inference tasks.
>
> Signed-off-by: Yuji Ishikawa <[email protected]>
> Reviewed-by: Nobuhiro Iwamatsu <[email protected]>
> ---
> v1 -> v2:
> - applied checkpatch.pl --strict
> - removed unused code
> ---
> drivers/soc/visconti/Kconfig | 6 +
> drivers/soc/visconti/Makefile | 2 +
> drivers/soc/visconti/dnn/Makefile | 6 +
> drivers/soc/visconti/dnn/dnn.c | 523 +++++++++++++++++++++++++
> drivers/soc/visconti/dnn/hwd_dnn.c | 183 +++++++++
> drivers/soc/visconti/dnn/hwd_dnn.h | 68 ++++
> drivers/soc/visconti/dnn/hwd_dnn_reg.h | 228 +++++++++++
> drivers/soc/visconti/uapi/dnn.h | 77 ++++
> 8 files changed, 1093 insertions(+)
> create mode 100644 drivers/soc/visconti/dnn/Makefile
> create mode 100644 drivers/soc/visconti/dnn/dnn.c
> create mode 100644 drivers/soc/visconti/dnn/hwd_dnn.c
> create mode 100644 drivers/soc/visconti/dnn/hwd_dnn.h
> create mode 100644 drivers/soc/visconti/dnn/hwd_dnn_reg.h
> create mode 100644 drivers/soc/visconti/uapi/dnn.h
>
> diff --git a/drivers/soc/visconti/Kconfig b/drivers/soc/visconti/Kconfig
> index 8b1378917..a25287d0c 100644
> --- a/drivers/soc/visconti/Kconfig
> +++ b/drivers/soc/visconti/Kconfig
> @@ -1 +1,7 @@
> +if ARCH_VISCONTI
> +
> +config VISCONTI_DNN
> + bool "Visconti DNN driver"
> +

We can't take Kconfig options with no help text at all, please provide
the needed information here.

And why can't this be a module?

2022-07-26 06:22:15

by Yuji Ishikawa

[permalink] [raw]
Subject: RE: [PATCH v2 3/5] soc: visconti: Add Toshiba Visconti DNN image processing accelerator

Hi Greg

Thank you for your comments.

> -----Original Message-----
> From: Greg KH <[email protected]>
> Sent: Monday, July 25, 2022 9:51 PM
> To: ishikawa yuji(石川 悠司 ○RDC□AITC○EA開)
> <[email protected]>
> Cc: Rob Herring <[email protected]>; Hans Verkuil <[email protected]>;
> iwamatsu nobuhiro(岩松 信洋 □SWC◯ACT)
> <[email protected]>; Jonathan Corbet <[email protected]>;
> Sumit Semwal <[email protected]>; Christian König
> <[email protected]>; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH v2 3/5] soc: visconti: Add Toshiba Visconti DNN image
> processing accelerator
>
> On Fri, Jul 22, 2022 at 05:28:56PM +0900, Yuji Ishikawa wrote:
> > --- /dev/null
> > +++ b/drivers/soc/visconti/uapi/dnn.h
> > @@ -0,0 +1,77 @@
> > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > +/* Toshiba Visconti DNN Accelerator Support
> > + *
> > + * (C) Copyright 2022 TOSHIBA CORPORATION
> > + * (C) Copyright 2022 Toshiba Electronic Devices & Storage
> > +Corporation */
> > +
> > +#ifndef _UAPI_LINUX_DNN_H
> > +#define _UAPI_LINUX_DNN_H
> > +
> > +#include <linux/ioctl.h>
> > +#include <linux/types.h>
> > +#include "ipa.h"
> > +
> > +#define DRV_DNN_BIT_CONFIG_DESC_FINAL (0x8000U)
> > +#define DRV_DNN_BUFFER_INDEX_MAX (15)
> > +
> > +#define DRV_DNN_BASE_ADDR_NUM (8U) /* DNN number of base
> address */
> > +
> > +#define DRV_DNN_BASE_ADDR_PURPOSE_INPUT (1U)
> > +#define DRV_DNN_BASE_ADDR_PURPOSE_OUTPUT (2U)
> > +#define DRV_DNN_BASE_ADDR_PURPOSE_AWB (3U)
> > +#define DRV_DNN_BASE_ADDR_PURPOSE_TEMPORARY (4U)
> > +
> > +/**
> > + * struct drv_dnn_status - DNN IPA status for IOC_IPA_GET_STATUS
> > + *
> > + * @state: State of driver
> > + * @eer_cmd: Execution error command
> > + * @eer: Execution error
> > + * @reserved: Padding
> > + * @eer_flags: Execution error flags
> > + */
> > +struct drv_dnn_status {
> > + enum drv_ipa_state state;
> > + __u32 eer_cmd;
> > + __u32 eer : 1;
> > + __u32 reserved : 31;
>
> bitfields will not work like this for uapi files, sorry.

I'll change the type of the member eer from bitfield to bool.

>
> > + __u32 eer_flags[32];
>
> What endian is all of these? Big? Little? Unknown?

The processors and accelerators are little endian in Visconti SoC.
Do I have to use more specific type such as __le32 ?

>
> > +};
> > +
> > +struct drv_dnn_base_addr {
> > + __u32 purpose;
> > + union {
> > + struct drv_ipa_addr ipa_addr;
> > + uintptr_t list_addr;
>
> You really do not ever want a uintptr_t in a uapi file, that's not going to be
> portable at all. It's also not a valid kernel type :(

I understand. The member list_addr should be typed "struct drv_ipa_addr*".

> No documentation on what "purpose" is for?

I'll add description for struct drv_dnn_base_addr with kernel-doc style.

>
> > + } addr;
> > +};
> > +
> > +/**
> > + * struct drv_dnn_descriptor - DNN IPA Descriptor for IOC_IPA_START
> > + *
> > + * @configuration: Address of configuration data
> > + * @configuration_offset: Configuration offset
> > + * @configuration_size: Configuration data size
> > + * @list_num: Number of input/output list
> > + * @base_addr: Base addresses
> > + * @base_addr_flag: Bit-fields of base_addr list config;
> > + * 0 for fixed address,
> > + * 1 for address list.
>
> Where are the bitfield definitions?
> What about unused bits, what happens with them? You are checking them,
> right?
>

I'll update comments for "base_addr" and "base_addr_flag".
The member base_addr[n] are handled differently
according to the n'th bit of the member base_addr_flag;
where 0 <= n < HWD_DNN_BASE_ADDR_NUM-1.

> > + * @config_done: Flags of called configuration
> > + * @buffer_info: Table of buffer information
> > + * @buffer_info_num: Number of buffer_info
> > + */
> > +struct drv_dnn_descriptor {
> > + struct drv_ipa_addr configuration;
> > + __u32 configuration_offset;
>
> What endian are any of these?

They are little endian as processors and accelerators are LE.
Do I have to use specific type such as __le32?

Do we need special care for endianness when userland and kernel are sharing data (a drv_dnn_descriptor instance) ?
I thought there're no endianness problem when the driver is reading/writing HW's 32bit registers.
I understand, generally, special cares are needed for some cases like:
* network byte order --- endianness is specified in a specification.
* data blocks are stored to byte-addressable memory and read by another processing elements (HW or CPU in another system).
* HW designed for little endian is placed in big endian system.

> thanks,
>
> greg k-h

Regards,
Yuji

2022-07-26 06:31:18

by Yuji Ishikawa

[permalink] [raw]
Subject: RE: [PATCH v2 3/5] soc: visconti: Add Toshiba Visconti DNN image processing accelerator

Hi Greg

Thanks for your comments

> -----Original Message-----
> From: Greg KH <[email protected]>
> Sent: Monday, July 25, 2022 9:48 PM
> To: ishikawa yuji(石川 悠司 ○RDC□AITC○EA開)
> <[email protected]>Hi
> Cc: Rob Herring <[email protected]>; Hans Verkuil <[email protected]>;
> iwamatsu nobuhiro(岩松 信洋 □SWC◯ACT)
> <[email protected]>; Jonathan Corbet <[email protected]>;
> Sumit Semwal <[email protected]>; Christian König
> <[email protected]>; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH v2 3/5] soc: visconti: Add Toshiba Visconti DNN image
> processing accelerator
>
> On Fri, Jul 22, 2022 at 05:28:56PM +0900, Yuji Ishikawa wrote:
> > Add support to DNN image processing accelerator on Toshiba Visconti ARM
> SoCs.
> > The accelerator is applicable to DNN inference tasks.
> >
> > Signed-off-by: Yuji Ishikawa <[email protected]>
> > Reviewed-by: Nobuhiro Iwamatsu <[email protected]>
> > ---
> > v1 -> v2:
> > - applied checkpatch.pl --strict
> > - removed unused code
> > ---
> > drivers/soc/visconti/Kconfig | 6 +
> > drivers/soc/visconti/Makefile | 2 +
> > drivers/soc/visconti/dnn/Makefile | 6 +
> > drivers/soc/visconti/dnn/dnn.c | 523
> +++++++++++++++++++++++++
> > drivers/soc/visconti/dnn/hwd_dnn.c | 183 +++++++++
> > drivers/soc/visconti/dnn/hwd_dnn.h | 68 ++++
> > drivers/soc/visconti/dnn/hwd_dnn_reg.h | 228 +++++++++++
> > drivers/soc/visconti/uapi/dnn.h | 77 ++++
> > 8 files changed, 1093 insertions(+)
> > create mode 100644 drivers/soc/visconti/dnn/Makefile create mode
> > 100644 drivers/soc/visconti/dnn/dnn.c create mode 100644
> > drivers/soc/visconti/dnn/hwd_dnn.c
> > create mode 100644 drivers/soc/visconti/dnn/hwd_dnn.h
> > create mode 100644 drivers/soc/visconti/dnn/hwd_dnn_reg.h
> > create mode 100644 drivers/soc/visconti/uapi/dnn.h
> >
> > diff --git a/drivers/soc/visconti/Kconfig
> > b/drivers/soc/visconti/Kconfig index 8b1378917..a25287d0c 100644
> > --- a/drivers/soc/visconti/Kconfig
> > +++ b/drivers/soc/visconti/Kconfig
> > @@ -1 +1,7 @@
> > +if ARCH_VISCONTI
> > +
> > +config VISCONTI_DNN
> > + bool "Visconti DNN driver"
> > +
>
> We can't take Kconfig options with no help text at all, please provide the
> needed information here.

I'll add detailed description to Kconfig help text.

>
> And why can't this be a module?

There're no special reasons to disable module build.
I'll test module build and add the option.

Regards,
Yuji

2022-07-26 15:17:33

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] soc: visconti: Add Toshiba Visconti DNN image processing accelerator

On Tue, Jul 26, 2022 at 06:10:37AM +0000, [email protected] wrote:
> Hi Greg
>
> Thank you for your comments.
>
> > -----Original Message-----
> > From: Greg KH <[email protected]>
> > Sent: Monday, July 25, 2022 9:51 PM
> > To: ishikawa yuji(石川 悠司 ○RDC□AITC○EA開)
> > <[email protected]>
> > Cc: Rob Herring <[email protected]>; Hans Verkuil <[email protected]>;
> > iwamatsu nobuhiro(岩松 信洋 □SWC◯ACT)
> > <[email protected]>; Jonathan Corbet <[email protected]>;
> > Sumit Semwal <[email protected]>; Christian König
> > <[email protected]>; [email protected];
> > [email protected]; [email protected];
> > [email protected]; [email protected]
> > Subject: Re: [PATCH v2 3/5] soc: visconti: Add Toshiba Visconti DNN image
> > processing accelerator
> >
> > On Fri, Jul 22, 2022 at 05:28:56PM +0900, Yuji Ishikawa wrote:
> > > --- /dev/null
> > > +++ b/drivers/soc/visconti/uapi/dnn.h
> > > @@ -0,0 +1,77 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > > +/* Toshiba Visconti DNN Accelerator Support
> > > + *
> > > + * (C) Copyright 2022 TOSHIBA CORPORATION
> > > + * (C) Copyright 2022 Toshiba Electronic Devices & Storage
> > > +Corporation */
> > > +
> > > +#ifndef _UAPI_LINUX_DNN_H
> > > +#define _UAPI_LINUX_DNN_H
> > > +
> > > +#include <linux/ioctl.h>
> > > +#include <linux/types.h>
> > > +#include "ipa.h"
> > > +
> > > +#define DRV_DNN_BIT_CONFIG_DESC_FINAL (0x8000U)
> > > +#define DRV_DNN_BUFFER_INDEX_MAX (15)
> > > +
> > > +#define DRV_DNN_BASE_ADDR_NUM (8U) /* DNN number of base
> > address */
> > > +
> > > +#define DRV_DNN_BASE_ADDR_PURPOSE_INPUT (1U)
> > > +#define DRV_DNN_BASE_ADDR_PURPOSE_OUTPUT (2U)
> > > +#define DRV_DNN_BASE_ADDR_PURPOSE_AWB (3U)
> > > +#define DRV_DNN_BASE_ADDR_PURPOSE_TEMPORARY (4U)
> > > +
> > > +/**
> > > + * struct drv_dnn_status - DNN IPA status for IOC_IPA_GET_STATUS
> > > + *
> > > + * @state: State of driver
> > > + * @eer_cmd: Execution error command
> > > + * @eer: Execution error
> > > + * @reserved: Padding
> > > + * @eer_flags: Execution error flags
> > > + */
> > > +struct drv_dnn_status {
> > > + enum drv_ipa_state state;
> > > + __u32 eer_cmd;
> > > + __u32 eer : 1;
> > > + __u32 reserved : 31;
> >
> > bitfields will not work like this for uapi files, sorry.
>
> I'll change the type of the member eer from bitfield to bool.

bool will not work for a user/kernel api structure at all, sorry.

> > > + __u32 eer_flags[32];
> >
> > What endian is all of these? Big? Little? Unknown?
>
> The processors and accelerators are little endian in Visconti SoC.
> Do I have to use more specific type such as __le32 ?

Of course, this has to be defined as to how the hardware sees it. Why
wouldn't you specify this?

> > > +};
> > > +
> > > +struct drv_dnn_base_addr {
> > > + __u32 purpose;
> > > + union {
> > > + struct drv_ipa_addr ipa_addr;
> > > + uintptr_t list_addr;
> >
> > You really do not ever want a uintptr_t in a uapi file, that's not going to be
> > portable at all. It's also not a valid kernel type :(
>
> I understand. The member list_addr should be typed "struct drv_ipa_addr*".

No, not at all, that too will not work and is not portable. Please read
the documentation in the kernel for how to write correct user/kernel
apis with ioctl structures. It is all documented there, please do not
ignore it and create an api that will be broken.

> > > + * @config_done: Flags of called configuration
> > > + * @buffer_info: Table of buffer information
> > > + * @buffer_info_num: Number of buffer_info
> > > + */
> > > +struct drv_dnn_descriptor {
> > > + struct drv_ipa_addr configuration;
> > > + __u32 configuration_offset;
> >
> > What endian are any of these?
>
> They are little endian as processors and accelerators are LE.
> Do I have to use specific type such as __le32?

Yes, as that is defined by your hardware, not the processor the kernel
is running as.

> Do we need special care for endianness when userland and kernel are sharing data (a drv_dnn_descriptor instance) ?

Yes, why wouldn't you?

> I thought there're no endianness problem when the driver is reading/writing HW's 32bit registers.

Is that what you are doing here? It's impossible to tell.

For data that only crosses the user/kernel boundry, you can use the
native processor endian, but when it crosses the kernel/hardware
boundry, you HAVE to specify it as to what the hardware expects.

thanks,

greg k-h