The XDMA engine embedded in the AST2500 SOC performs PCI DMA operations
between the SOC (acting as a BMC) and a host processor in a server.
This series adds a driver to control the XDMA engine in order to easily
perform DMA operations to and from the host processor.
Eddie James (6):
dt-bindings: misc: Add Aspeed XDMA engine binding documentation
drivers/misc: Add Aspeed XDMA engine driver
drivers/misc: xdma: Add user interface
drivers/misc: xdma: Add PCI device configuration sysfs
drivers/misc: xdma: Add debugfs entries
ARM: dts: aspeed: Add XDMA engine
.../devicetree/bindings/misc/aspeed,xdma.txt | 23 +
MAINTAINERS | 9 +
arch/arm/boot/dts/aspeed-g5.dtsi | 8 +
drivers/misc/Kconfig | 8 +
drivers/misc/Makefile | 1 +
drivers/misc/aspeed-xdma.c | 924 +++++++++++++++++++++
include/uapi/linux/aspeed-xdma.h | 26 +
7 files changed, 999 insertions(+)
create mode 100644 Documentation/devicetree/bindings/misc/aspeed,xdma.txt
create mode 100644 drivers/misc/aspeed-xdma.c
create mode 100644 include/uapi/linux/aspeed-xdma.h
--
1.8.3.1
Add debugfs entries for the relevant XDMA engine registers and for
dumping the AST2500 reserved memory space.
Signed-off-by: Eddie James <[email protected]>
---
drivers/misc/aspeed-xdma.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 96 insertions(+)
diff --git a/drivers/misc/aspeed-xdma.c b/drivers/misc/aspeed-xdma.c
index 0a1a093..a645a5f 100644
--- a/drivers/misc/aspeed-xdma.c
+++ b/drivers/misc/aspeed-xdma.c
@@ -142,6 +142,12 @@ struct aspeed_xdma {
struct list_head vga_blks_free;
struct miscdevice misc;
+ struct dentry *debugfs_dir;
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct debugfs_regset32 regset;
+ struct debugfs_reg32 regs[XDMA_NUM_DEBUGFS_REGS];
+#endif /* IS_ENABLED(CONFIG_DEBUG_FS) */
};
struct aspeed_xdma_client {
@@ -634,6 +640,92 @@ static int aspeed_xdma_init_mem(struct aspeed_xdma *ctx)
return 0;
}
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+static ssize_t aspeed_xdma_debugfs_vga_read(struct file *file,
+ char __user *buf, size_t len,
+ loff_t *offset)
+{
+ int rc;
+ struct inode *inode = file_inode(file);
+ struct aspeed_xdma *ctx = inode->i_private;
+ void __iomem *vga = ioremap(ctx->vga_phys, ctx->vga_size);
+ loff_t offs = *offset;
+ void *tmp;
+
+ if (!vga)
+ return -ENOMEM;
+
+ if (len + offs > ctx->vga_size) {
+ iounmap(vga);
+ return -EINVAL;
+ }
+
+ tmp = kzalloc(len, GFP_KERNEL);
+ if (!tmp) {
+ iounmap(vga);
+ return -ENOMEM;
+ }
+
+ memcpy_fromio(tmp, vga + offs, len);
+
+ rc = copy_to_user(buf, tmp, len);
+ if (rc) {
+ iounmap(vga);
+ kfree(tmp);
+ return rc;
+ }
+
+ *offset = offs + len;
+
+ kfree(tmp);
+ iounmap(vga);
+
+ return len;
+}
+
+static const struct file_operations aspeed_xdma_debugfs_vga_fops = {
+ .owner = THIS_MODULE,
+ .llseek = generic_file_llseek,
+ .read = aspeed_xdma_debugfs_vga_read,
+};
+
+static void aspeed_xdma_init_debugfs(struct aspeed_xdma *ctx)
+{
+ ctx->debugfs_dir = debugfs_create_dir(DEVICE_NAME, NULL);
+ if (IS_ERR_OR_NULL(ctx->debugfs_dir)) {
+ dev_warn(ctx->dev, "Failed to create debugfs directory.\n");
+ return;
+ }
+
+ debugfs_create_file("vga", 0444, ctx->debugfs_dir, ctx,
+ &aspeed_xdma_debugfs_vga_fops);
+
+ ctx->regs[0].name = "addr";
+ ctx->regs[0].offset = XDMA_BMC_CMD_QUEUE_ADDR;
+ ctx->regs[1].name = "endp";
+ ctx->regs[1].offset = XDMA_BMC_CMD_QUEUE_ENDP;
+ ctx->regs[2].name = "writep";
+ ctx->regs[2].offset = XDMA_BMC_CMD_QUEUE_WRITEP;
+ ctx->regs[3].name = "readp";
+ ctx->regs[3].offset = XDMA_BMC_CMD_QUEUE_READP;
+ ctx->regs[4].name = "control";
+ ctx->regs[4].offset = XDMA_CTRL;
+ ctx->regs[5].name = "status";
+ ctx->regs[5].offset = XDMA_STATUS;
+
+ ctx->regset.regs = ctx->regs;
+ ctx->regset.nregs = XDMA_NUM_DEBUGFS_REGS;
+ ctx->regset.base = ctx->base;
+
+ debugfs_create_regset32("regs", 0444, ctx->debugfs_dir, &ctx->regset);
+}
+#else
+static void aspeed_xdma_init_debugfs(struct aspeed_xdma *ctx)
+{
+}
+
+#endif /* IS_ENABLED(CONFIG_DEBUG_FS) */
+
static void aspeed_xdma_free_vga_blks(struct aspeed_xdma *ctx)
{
struct aspeed_xdma_vga_blk *free;
@@ -786,6 +878,8 @@ static int aspeed_xdma_probe(struct platform_device *pdev)
device_create_file(dev, &dev_attr_use_bmc);
device_create_file(dev, &dev_attr_use_vga);
+ aspeed_xdma_init_debugfs(ctx);
+
return 0;
}
@@ -793,6 +887,8 @@ static int aspeed_xdma_remove(struct platform_device *pdev)
{
struct aspeed_xdma *ctx = platform_get_drvdata(pdev);
+ debugfs_remove_recursive(ctx->debugfs_dir);
+
device_remove_file(ctx->dev, &dev_attr_use_vga);
device_remove_file(ctx->dev, &dev_attr_use_bmc);
--
1.8.3.1
Add a node for the XDMA engine with all the necessary information.
Signed-off-by: Eddie James <[email protected]>
---
arch/arm/boot/dts/aspeed-g5.dtsi | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi
index 85ed9db..a0856c7 100644
--- a/arch/arm/boot/dts/aspeed-g5.dtsi
+++ b/arch/arm/boot/dts/aspeed-g5.dtsi
@@ -234,6 +234,14 @@
reg-io-width = <4>;
};
+ xdma: xdma@1e6e7000 {
+ compatible = "aspeed,ast2500-xdma";
+ reg = <0x1e6e7000 0x100>;
+ resets = <&syscon ASPEED_RESET_XDMA>;
+ interrupts = <6>;
+ status = "disabled";
+ };
+
adc: adc@1e6e9000 {
compatible = "aspeed,ast2500-adc";
reg = <0x1e6e9000 0xb0>;
--
1.8.3.1
The AST2500 has two PCI devices embedded. The XDMA engine can use either
device to perform DMA transfers. Users need the capability to choose
which device to use. This commit therefore adds two sysfs files that
toggle the AST2500 and XDMA engine between the two PCI devices.
Signed-off-by: Eddie James <[email protected]>
---
drivers/misc/aspeed-xdma.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 66 insertions(+)
diff --git a/drivers/misc/aspeed-xdma.c b/drivers/misc/aspeed-xdma.c
index 16235b3..0a1a093 100644
--- a/drivers/misc/aspeed-xdma.c
+++ b/drivers/misc/aspeed-xdma.c
@@ -645,6 +645,66 @@ static void aspeed_xdma_free_vga_blks(struct aspeed_xdma *ctx)
}
}
+static int aspeed_xdma_change_pcie_conf(struct aspeed_xdma *ctx, u32 val)
+{
+ int rc;
+
+ mutex_lock(&ctx->start_lock);
+ rc = wait_event_interruptible_timeout(ctx->wait,
+ !test_bit(XDMA_IN_PRG,
+ &ctx->flags),
+ msecs_to_jiffies(1000));
+ if (rc < 0) {
+ mutex_unlock(&ctx->start_lock);
+ return -EINTR;
+ }
+
+ /* previous op didn't complete, wake up waiters anyway */
+ if (!rc)
+ wake_up_interruptible_all(&ctx->wait);
+
+ reset_control_assert(ctx->reset);
+ msleep(10);
+
+ regmap_update_bits(ctx->scu, SCU_PCIE_CONF,
+ SCU_PCIE_CONF_VGA_EN | SCU_PCIE_CONF_BMC_EN,
+ val);
+ msleep(10);
+
+ reset_control_deassert(ctx->reset);
+ msleep(10);
+
+ aspeed_xdma_init_eng(ctx);
+
+ mutex_unlock(&ctx->start_lock);
+
+ return 0;
+}
+
+static ssize_t aspeed_xdma_use_bmc(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int rc;
+ struct aspeed_xdma *ctx = dev_get_drvdata(dev);
+
+ rc = aspeed_xdma_change_pcie_conf(ctx, SCU_PCIE_CONF_BMC_EN);
+ return rc ?: count;
+}
+static DEVICE_ATTR(use_bmc, 0200, NULL, aspeed_xdma_use_bmc);
+
+static ssize_t aspeed_xdma_use_vga(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int rc;
+ struct aspeed_xdma *ctx = dev_get_drvdata(dev);
+
+ rc = aspeed_xdma_change_pcie_conf(ctx, SCU_PCIE_CONF_VGA_EN);
+ return rc ?: count;
+}
+static DEVICE_ATTR(use_vga, 0200, NULL, aspeed_xdma_use_vga);
+
static int aspeed_xdma_probe(struct platform_device *pdev)
{
int irq;
@@ -723,6 +783,9 @@ static int aspeed_xdma_probe(struct platform_device *pdev)
return rc;
}
+ device_create_file(dev, &dev_attr_use_bmc);
+ device_create_file(dev, &dev_attr_use_vga);
+
return 0;
}
@@ -730,6 +793,9 @@ static int aspeed_xdma_remove(struct platform_device *pdev)
{
struct aspeed_xdma *ctx = platform_get_drvdata(pdev);
+ device_remove_file(ctx->dev, &dev_attr_use_vga);
+ device_remove_file(ctx->dev, &dev_attr_use_bmc);
+
misc_deregister(&ctx->misc);
aspeed_xdma_free_vga_blks(ctx);
--
1.8.3.1
This commits adds a miscdevice to provide a user interface to the XDMA
engine. The interface provides the write operation to start DMA
operations. The DMA parameters are passed as the data to the write call.
The actual data to transfer is NOT passed through write. Note that both
directions of DMA operation are accomplished through the write command;
BMC to host and host to BMC.
The XDMA engine is restricted to only accessing the reserved memory
space on the AST2500, typically used by the VGA. For this reason, this
commit also adds a simple memory manager for this reserved memory space
which can then be allocated in pages by users calling mmap. The space
allocated by a client will be the space used in the DMA operation. For
an "upstream" (BMC to host) operation, the data in the client's area
will be transferred to the host. For a "downstream" (host to BMC)
operation, the host data will be placed in the client's memory area.
Poll is also provided in order to determine when the DMA operation is
complete for non-blocking IO.
Signed-off-by: Eddie James <[email protected]>
---
drivers/misc/aspeed-xdma.c | 301 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 301 insertions(+)
diff --git a/drivers/misc/aspeed-xdma.c b/drivers/misc/aspeed-xdma.c
index 970ea92..16235b3 100644
--- a/drivers/misc/aspeed-xdma.c
+++ b/drivers/misc/aspeed-xdma.c
@@ -113,6 +113,12 @@ struct aspeed_xdma_cmd {
u32 resv1;
};
+struct aspeed_xdma_vga_blk {
+ u32 phys;
+ u32 size;
+ struct list_head list;
+};
+
struct aspeed_xdma_client;
struct aspeed_xdma {
@@ -123,6 +129,8 @@ struct aspeed_xdma {
unsigned long flags;
unsigned int cmd_idx;
+ struct mutex list_lock;
+ struct mutex start_lock;
wait_queue_head_t wait;
struct aspeed_xdma_client *current_client;
@@ -131,6 +139,9 @@ struct aspeed_xdma {
dma_addr_t vga_dma;
void *cmdq;
void *vga_virt;
+ struct list_head vga_blks_free;
+
+ struct miscdevice misc;
};
struct aspeed_xdma_client {
@@ -298,6 +309,260 @@ static irqreturn_t aspeed_xdma_irq(int irq, void *arg)
return IRQ_HANDLED;
}
+static u32 aspeed_xdma_alloc_vga_blk(struct aspeed_xdma *ctx, u32 req_size)
+{
+ u32 phys = 0;
+ u32 size = PAGE_ALIGN(req_size);
+ struct aspeed_xdma_vga_blk *free;
+
+ mutex_lock(&ctx->list_lock);
+
+ list_for_each_entry(free, &ctx->vga_blks_free, list) {
+ if (free->size >= size) {
+ phys = free->phys;
+
+ if (size == free->size) {
+ dev_dbg(ctx->dev,
+ "Allocd %08x[%08x r(%08x)], del.\n",
+ phys, size, req_size);
+ list_del(&free->list);
+ kfree(free);
+ } else {
+ free->phys += size;
+ free->size -= size;
+ dev_dbg(ctx->dev, "Allocd %08x[%08x r(%08x)], "
+ "shrunk %08x[%08x].\n", phys, size,
+ req_size, free->phys, free->size);
+ }
+
+ break;
+ }
+ }
+
+ mutex_unlock(&ctx->list_lock);
+
+ return phys;
+}
+
+static void aspeed_xdma_free_vga_blk(struct aspeed_xdma *ctx, u32 phys,
+ u32 req_size)
+{
+ u32 min_free = UINT_MAX;
+ u32 size = PAGE_ALIGN(req_size);
+ const u32 end = phys + size;
+ struct aspeed_xdma_vga_blk *free;
+
+ mutex_lock(&ctx->list_lock);
+
+ list_for_each_entry(free, &ctx->vga_blks_free, list) {
+ if (end == free->phys) {
+ u32 fend = free->phys + free->size;
+
+ dev_dbg(ctx->dev,
+ "Freed %08x[%08x r(%08x)], exp %08x[%08x].\n",
+ phys, size, req_size, free->phys, free->size);
+
+ free->phys = phys;
+ free->size = fend - free->phys;
+
+ mutex_unlock(&ctx->list_lock);
+ return;
+ }
+
+ if (free->phys < min_free)
+ min_free = free->phys;
+ }
+
+ free = kzalloc(sizeof(*free), GFP_KERNEL);
+ if (free) {
+ free->phys = phys;
+ free->size = size;
+
+ dev_dbg(ctx->dev, "Freed %08x[%08x r(%08x)], new.\n", phys,
+ size, req_size);
+
+ if (phys < min_free)
+ list_add(&free->list, &ctx->vga_blks_free);
+ else
+ list_add_tail(&free->list, &ctx->vga_blks_free);
+ } else {
+ dev_err(ctx->dev, "Failed to register freed block.\n");
+ }
+
+ mutex_unlock(&ctx->list_lock);
+}
+
+static ssize_t aspeed_xdma_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *offset)
+{
+ int rc;
+ struct aspeed_xdma_op op;
+ struct aspeed_xdma_client *client = file->private_data;
+ struct aspeed_xdma *ctx = client->ctx;
+ u32 offs = client->phys ? (client->phys - ctx->vga_phys) :
+ XDMA_CMDQ_SIZE;
+
+ if (len != sizeof(struct aspeed_xdma_op))
+ return -EINVAL;
+
+ rc = copy_from_user(&op, buf, len);
+ if (rc)
+ return rc;
+
+ if (op.len > (ctx->vga_size - offs) || op.len < XDMA_BYTE_ALIGN)
+ return -EINVAL;
+
+ if (file->f_flags & O_NONBLOCK) {
+ if (!mutex_trylock(&ctx->start_lock))
+ return -EAGAIN;
+
+ if (test_bit(XDMA_IN_PRG, &ctx->flags)) {
+ mutex_unlock(&ctx->start_lock);
+ return -EAGAIN;
+ }
+ } else {
+ mutex_lock(&ctx->start_lock);
+
+ rc = wait_event_interruptible(ctx->wait,
+ !test_bit(XDMA_IN_PRG,
+ &ctx->flags));
+ if (rc) {
+ mutex_unlock(&ctx->start_lock);
+ return -EINTR;
+ }
+ }
+
+ ctx->current_client = client;
+ set_bit(XDMA_IN_PRG, &client->flags);
+
+ aspeed_xdma_start(ctx, &op, ctx->vga_phys + offs);
+
+ mutex_unlock(&ctx->start_lock);
+
+ if (!(file->f_flags & O_NONBLOCK)) {
+ rc = wait_event_interruptible(ctx->wait,
+ !test_bit(XDMA_IN_PRG,
+ &ctx->flags));
+ if (rc)
+ return -EINTR;
+ }
+
+ return len;
+}
+
+static __poll_t aspeed_xdma_poll(struct file *file,
+ struct poll_table_struct *wait)
+{
+ __poll_t mask = 0;
+ __poll_t req = poll_requested_events(wait);
+ struct aspeed_xdma_client *client = file->private_data;
+ struct aspeed_xdma *ctx = client->ctx;
+
+ if (req & (EPOLLIN | EPOLLRDNORM)) {
+ if (test_bit(XDMA_IN_PRG, &client->flags))
+ poll_wait(file, &ctx->wait, wait);
+
+ if (!test_bit(XDMA_IN_PRG, &client->flags))
+ mask |= EPOLLIN | EPOLLRDNORM;
+ }
+
+ if (req & (EPOLLOUT | EPOLLWRNORM)) {
+ if (test_bit(XDMA_IN_PRG, &ctx->flags))
+ poll_wait(file, &ctx->wait, wait);
+
+ if (!test_bit(XDMA_IN_PRG, &ctx->flags))
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ }
+
+ return mask;
+}
+
+static void aspeed_xdma_vma_close(struct vm_area_struct *vma)
+{
+ struct aspeed_xdma_client *client = vma->vm_private_data;
+
+ aspeed_xdma_free_vga_blk(client->ctx, client->phys, client->size);
+
+ client->phys = 0;
+ client->size = 0;
+}
+
+static const struct vm_operations_struct aspeed_xdma_vm_ops = {
+ .close = aspeed_xdma_vma_close,
+};
+
+static int aspeed_xdma_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int rc;
+ struct aspeed_xdma_client *client = file->private_data;
+ struct aspeed_xdma *ctx = client->ctx;
+
+ /* restrict file to one mapping */
+ if (client->size)
+ return -ENOMEM;
+
+ client->size = vma->vm_end - vma->vm_start;
+ client->phys = aspeed_xdma_alloc_vga_blk(ctx, client->size);
+ if (!client->phys) {
+ client->size = 0;
+ return -ENOMEM;
+ }
+
+ vma->vm_pgoff = (client->phys - ctx->vga_phys) >> PAGE_SHIFT;
+ vma->vm_ops = &aspeed_xdma_vm_ops;
+ vma->vm_private_data = client;
+
+ rc = dma_mmap_coherent(ctx->dev, vma, ctx->vga_virt, ctx->vga_dma,
+ ctx->vga_size);
+ if (rc) {
+ aspeed_xdma_free_vga_blk(ctx, client->phys, client->size);
+
+ client->phys = 0;
+ client->size = 0;
+ return rc;
+ }
+
+ dev_dbg(ctx->dev, "mmap: v[%08lx] to p[%08x], s[%08x]\n",
+ vma->vm_start, client->phys, client->size);
+
+ return 0;
+}
+
+static int aspeed_xdma_open(struct inode *inode, struct file *file)
+{
+ struct miscdevice *misc = file->private_data;
+ struct aspeed_xdma *ctx = container_of(misc, struct aspeed_xdma, misc);
+ struct aspeed_xdma_client *client = kzalloc(sizeof(*client),
+ GFP_KERNEL);
+
+ if (!client)
+ return -ENOMEM;
+
+ client->ctx = ctx;
+ file->private_data = client;
+ return 0;
+}
+
+static int aspeed_xdma_release(struct inode *inode, struct file *file)
+{
+ struct aspeed_xdma_client *client = file->private_data;
+
+ if (client->ctx->current_client == client)
+ client->ctx->current_client = NULL;
+
+ kfree(client);
+ return 0;
+}
+
+static const struct file_operations aspeed_xdma_fops = {
+ .owner = THIS_MODULE,
+ .write = aspeed_xdma_write,
+ .poll = aspeed_xdma_poll,
+ .mmap = aspeed_xdma_mmap,
+ .open = aspeed_xdma_open,
+ .release = aspeed_xdma_release,
+};
+
static int aspeed_xdma_init_mem(struct aspeed_xdma *ctx)
{
int rc;
@@ -360,12 +625,26 @@ static int aspeed_xdma_init_mem(struct aspeed_xdma *ctx)
return -ENOMEM;
}
+ aspeed_xdma_free_vga_blk(ctx, ctx->vga_phys, ctx->vga_size);
+ aspeed_xdma_alloc_vga_blk(ctx, XDMA_CMDQ_SIZE);
+
dev_dbg(ctx->dev, "VGA mapped at phys[%08x], size[%08x].\n",
ctx->vga_phys, ctx->vga_size);
return 0;
}
+static void aspeed_xdma_free_vga_blks(struct aspeed_xdma *ctx)
+{
+ struct aspeed_xdma_vga_blk *free;
+ struct aspeed_xdma_vga_blk *tmp;
+
+ list_for_each_entry_safe(free, tmp, &ctx->vga_blks_free, list) {
+ list_del(&free->list);
+ kfree(free);
+ }
+}
+
static int aspeed_xdma_probe(struct platform_device *pdev)
{
int irq;
@@ -380,6 +659,9 @@ static int aspeed_xdma_probe(struct platform_device *pdev)
ctx->dev = dev;
platform_set_drvdata(pdev, ctx);
init_waitqueue_head(&ctx->wait);
+ mutex_init(&ctx->list_lock);
+ mutex_init(&ctx->start_lock);
+ INIT_LIST_HEAD(&ctx->vga_blks_free);
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
ctx->base = devm_ioremap_resource(dev, res);
@@ -425,6 +707,22 @@ static int aspeed_xdma_probe(struct platform_device *pdev)
aspeed_xdma_init_eng(ctx);
+ ctx->misc.minor = MISC_DYNAMIC_MINOR;
+ ctx->misc.fops = &aspeed_xdma_fops;
+ ctx->misc.name = "xdma";
+ ctx->misc.parent = dev;
+ rc = misc_register(&ctx->misc);
+ if (rc) {
+ dev_err(dev, "Unable to register xdma miscdevice\n");
+
+ aspeed_xdma_free_vga_blks(ctx);
+ dma_free_coherent(dev, ctx->vga_size, ctx->vga_virt,
+ ctx->vga_dma);
+ dma_release_declared_memory(dev);
+ reset_control_assert(ctx->reset);
+ return rc;
+ }
+
return 0;
}
@@ -432,6 +730,9 @@ static int aspeed_xdma_remove(struct platform_device *pdev)
{
struct aspeed_xdma *ctx = platform_get_drvdata(pdev);
+ misc_deregister(&ctx->misc);
+
+ aspeed_xdma_free_vga_blks(ctx);
dma_free_coherent(ctx->dev, ctx->vga_size, ctx->vga_virt,
ctx->vga_dma);
dma_release_declared_memory(ctx->dev);
--
1.8.3.1
The XDMA engine embedded in the AST2500 SOC performs PCI DMA operations
between the SOC (acting as a BMC) and a host processor in a server.
This commit adds a driver to control the XDMA engine and adds functions
to initialize the hardware and memory and start DMA operations.
Signed-off-by: Eddie James <[email protected]>
---
MAINTAINERS | 9 +
drivers/misc/Kconfig | 8 +
drivers/misc/Makefile | 1 +
drivers/misc/aspeed-xdma.c | 461 +++++++++++++++++++++++++++++++++++++++
include/uapi/linux/aspeed-xdma.h | 26 +++
5 files changed, 505 insertions(+)
create mode 100644 drivers/misc/aspeed-xdma.c
create mode 100644 include/uapi/linux/aspeed-xdma.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 1e64279..8428f84 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2521,6 +2521,15 @@ S: Maintained
F: drivers/media/platform/aspeed-video.c
F: Documentation/devicetree/bindings/media/aspeed-video.txt
+ASPEED XDMA ENGINE DRIVER
+M: Eddie James <[email protected]>
+L: [email protected] (moderated for non-subscribers)
+L: [email protected]
+S: Maintained
+F: Documentation/devicetree/bindings/misc/aspeed,xdma.txt
+F: drivers/misc/aspeed-xdma.c
+F: include/uapi/linux/aspeed-xdma.h
+
ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS
M: Corentin Chary <[email protected]>
L: [email protected]
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 42ab8ec..8316f80 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -512,6 +512,14 @@ config ASPEED_LPC_SNOOP
allows the BMC to listen on and save the data written by
the host to an arbitrary LPC I/O port.
+config ASPEED_XDMA
+ tristate "Aspeed XDMA Engine Driver"
+ depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON && HAS_DMA
+ help
+ Enable support for the Aspeed XDMA Engine found on the Aspeed AST2500
+ SOC. The XDMA engine can perform automatic PCI DMA operations between
+ the AST2500 (acting as a BMC) and a host processor.
+
config PCI_ENDPOINT_TEST
depends on PCI
select CRC32
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index d5b7d34..969a680 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o
obj-$(CONFIG_CXL_BASE) += cxl/
obj-$(CONFIG_ASPEED_LPC_CTRL) += aspeed-lpc-ctrl.o
obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o
+obj-$(CONFIG_ASPEED_XDMA) += aspeed-xdma.o
obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o
obj-$(CONFIG_OCXL) += ocxl/
obj-y += cardreader/
diff --git a/drivers/misc/aspeed-xdma.c b/drivers/misc/aspeed-xdma.c
new file mode 100644
index 0000000..970ea92
--- /dev/null
+++ b/drivers/misc/aspeed-xdma.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright IBM Corp 2019
+
+#include <linux/aspeed-xdma.h>
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/mfd/syscon.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+
+#define DEVICE_NAME "aspeed-xdma"
+
+#define SCU_STRAP 0x070
+#define SCU_STRAP_VGA_MEM GENMASK(3, 2)
+
+#define SCU_PCIE_CONF 0x180
+#define SCU_PCIE_CONF_VGA_EN BIT(0)
+#define SCU_PCIE_CONF_VGA_EN_MMIO BIT(1)
+#define SCU_PCIE_CONF_VGA_EN_LPC BIT(2)
+#define SCU_PCIE_CONF_VGA_EN_MSI BIT(3)
+#define SCU_PCIE_CONF_VGA_EN_MCTP BIT(4)
+#define SCU_PCIE_CONF_VGA_EN_IRQ BIT(5)
+#define SCU_PCIE_CONF_VGA_EN_DMA BIT(6)
+#define SCU_PCIE_CONF_BMC_EN BIT(8)
+#define SCU_PCIE_CONF_BMC_EN_MMIO BIT(9)
+#define SCU_PCIE_CONF_BMC_EN_MSI BIT(11)
+#define SCU_PCIE_CONF_BMC_EN_MCTP BIT(12)
+#define SCU_PCIE_CONF_BMC_EN_IRQ BIT(13)
+#define SCU_PCIE_CONF_BMC_EN_DMA BIT(14)
+#define SCU_PCIE_CONF_RSVD GENMASK(19, 18)
+
+#define SDMC_CONF 0x004
+#define SDMC_CONF_MEM GENMASK(1, 0)
+#define SDMC_REMAP 0x008
+#define SDMC_REMAP_MAGIC GENMASK(17, 16)
+
+#define XDMA_CMD_SIZE 4
+#define XDMA_CMDQ_SIZE PAGE_SIZE
+#define XDMA_BYTE_ALIGN 16
+#define XDMA_MAX_LINE_SIZE BIT(10)
+#define XDMA_NUM_CMDS \
+ (XDMA_CMDQ_SIZE / sizeof(struct aspeed_xdma_cmd))
+#define XDMA_NUM_DEBUGFS_REGS 6
+
+#define XDMA_CMD_BMC_CHECK BIT(0)
+#define XDMA_CMD_BMC_ADDR GENMASK(29, 4)
+#define XDMA_CMD_BMC_DIR_US BIT(31)
+
+#define XDMA_CMD_COMM1_HI_HOST_PITCH GENMASK(14, 3)
+#define XDMA_CMD_COMM1_HI_BMC_PITCH GENMASK(30, 19)
+
+#define XDMA_CMD_CONF_CHECK BIT(1)
+#define XDMA_CMD_CONF_LINE_SIZE GENMASK(14, 4)
+#define XDMA_CMD_CONF_IRQ_BMC BIT(15)
+#define XDMA_CMD_CONF_NUM_LINES GENMASK(27, 16)
+#define XDMA_CMD_CONF_IRQ BIT(31)
+
+#define XDMA_DS_PCIE_REQ_SIZE_128 0
+#define XDMA_DS_PCIE_REQ_SIZE_256 1
+#define XDMA_DS_PCIE_REQ_SIZE_512 2
+#define XDMA_DS_PCIE_REQ_SIZE_1K 3
+#define XDMA_DS_PCIE_REQ_SIZE_2K 4
+#define XDMA_DS_PCIE_REQ_SIZE_4K 5
+
+#define XDMA_BMC_CMD_QUEUE_ADDR 0x10
+#define XDMA_BMC_CMD_QUEUE_ENDP 0x14
+#define XDMA_BMC_CMD_QUEUE_WRITEP 0x18
+#define XDMA_BMC_CMD_QUEUE_READP 0x1c
+#define XDMA_BMC_CMD_QUEUE_READP_MAGIC 0xee882266
+#define XDMA_CTRL 0x20
+#define XDMA_CTRL_US_COMP BIT(4)
+#define XDMA_CTRL_DS_COMP BIT(5)
+#define XDMA_CTRL_DS_DIRTY BIT(6)
+#define XDMA_CTRL_DS_PCIE_REQ_SIZE GENMASK(19, 17)
+#define XDMA_CTRL_DS_DATA_TIMEOUT BIT(28)
+#define XDMA_CTRL_DS_CHECK_ID BIT(29)
+#define XDMA_STATUS 0x24
+#define XDMA_STATUS_US_COMP BIT(4)
+#define XDMA_STATUS_DS_COMP BIT(5)
+
+enum {
+ XDMA_IN_PRG,
+ XDMA_UPSTREAM,
+};
+
+struct aspeed_xdma_cmd {
+ u32 host_addr_lo;
+ u32 host_addr_hi;
+ u32 bmc_addr;
+ u32 comm1_hi;
+ u32 conf;
+ u32 id;
+ u32 resv0;
+ u32 resv1;
+};
+
+struct aspeed_xdma_client;
+
+struct aspeed_xdma {
+ struct device *dev;
+ void __iomem *base;
+ struct regmap *scu;
+ struct reset_control *reset;
+
+ unsigned long flags;
+ unsigned int cmd_idx;
+ wait_queue_head_t wait;
+ struct aspeed_xdma_client *current_client;
+
+ u32 vga_phys;
+ u32 vga_size;
+ dma_addr_t vga_dma;
+ void *cmdq;
+ void *vga_virt;
+};
+
+struct aspeed_xdma_client {
+ struct aspeed_xdma *ctx;
+
+ unsigned long flags;
+ u32 phys;
+ u32 size;
+};
+
+static u32 aspeed_xdma_reg_read(struct aspeed_xdma *ctx, u32 reg)
+{
+ u32 v = readl(ctx->base + reg);
+
+ dev_dbg(ctx->dev, "read %02x[%08x]\n", reg, v);
+ return v;
+}
+
+static void aspeed_xdma_reg_write(struct aspeed_xdma *ctx, u32 reg, u32 val)
+{
+ writel(val, ctx->base + reg);
+ dev_dbg(ctx->dev, "write %02x[%08x]\n", reg, readl(ctx->base + reg));
+}
+
+static void aspeed_xdma_init_eng(struct aspeed_xdma *ctx)
+{
+ const u32 ctrl = XDMA_CTRL_US_COMP | XDMA_CTRL_DS_COMP |
+ XDMA_CTRL_DS_DIRTY | FIELD_PREP(XDMA_CTRL_DS_PCIE_REQ_SIZE,
+ XDMA_DS_PCIE_REQ_SIZE_256) |
+ XDMA_CTRL_DS_DATA_TIMEOUT | XDMA_CTRL_DS_CHECK_ID;
+
+ aspeed_xdma_reg_write(ctx, XDMA_BMC_CMD_QUEUE_ENDP,
+ XDMA_CMD_SIZE * XDMA_NUM_CMDS);
+ aspeed_xdma_reg_write(ctx, XDMA_BMC_CMD_QUEUE_READP,
+ XDMA_BMC_CMD_QUEUE_READP_MAGIC);
+ aspeed_xdma_reg_write(ctx, XDMA_BMC_CMD_QUEUE_WRITEP, 0);
+ aspeed_xdma_reg_write(ctx, XDMA_CTRL, ctrl);
+
+ aspeed_xdma_reg_write(ctx, XDMA_BMC_CMD_QUEUE_ADDR, ctx->vga_phys);
+
+ ctx->cmd_idx = 0;
+ ctx->flags = 0;
+}
+
+static void aspeed_xdma_reset(struct aspeed_xdma *ctx)
+{
+ reset_control_assert(ctx->reset);
+
+ msleep(10);
+
+ reset_control_deassert(ctx->reset);
+
+ msleep(10);
+
+ aspeed_xdma_init_eng(ctx);
+}
+
+static void aspeed_xdma_start(struct aspeed_xdma *ctx,
+ struct aspeed_xdma_op *op, u32 bmc_addr)
+{
+ u32 conf = XDMA_CMD_CONF_CHECK | XDMA_CMD_CONF_IRQ_BMC |
+ XDMA_CMD_CONF_IRQ;
+ unsigned int line_size = op->len / XDMA_BYTE_ALIGN;
+ unsigned int num_lines = 1;
+ unsigned int nidx = (ctx->cmd_idx + 1) % XDMA_NUM_CMDS;
+ struct aspeed_xdma_cmd *cmd =
+ &(((struct aspeed_xdma_cmd *)ctx->cmdq)[ctx->cmd_idx]);
+
+ if (line_size > XDMA_MAX_LINE_SIZE) {
+ unsigned int rem;
+ unsigned int total;
+
+ num_lines = line_size / XDMA_MAX_LINE_SIZE;
+ total = XDMA_MAX_LINE_SIZE * num_lines;
+ rem = line_size - total;
+ line_size = XDMA_MAX_LINE_SIZE;
+
+ if (rem) {
+ unsigned int offs = total * XDMA_BYTE_ALIGN;
+ u32 r_bmc_addr = bmc_addr + offs;
+ u64 r_host_addr = op->host_addr + (u64)offs;
+ struct aspeed_xdma_cmd *r_cmd =
+ &(((struct aspeed_xdma_cmd *)ctx->cmdq)[nidx]);
+
+ r_cmd->host_addr_lo =
+ (u32)(r_host_addr & 0xFFFFFFFFULL);
+ r_cmd->host_addr_hi = (u32)(r_host_addr >> 32ULL);
+ r_cmd->bmc_addr = (r_bmc_addr & XDMA_CMD_BMC_ADDR) |
+ XDMA_CMD_BMC_CHECK |
+ (op->upstream ? XDMA_CMD_BMC_DIR_US : 0);
+ r_cmd->conf = conf |
+ FIELD_PREP(XDMA_CMD_CONF_LINE_SIZE, rem) |
+ FIELD_PREP(XDMA_CMD_CONF_NUM_LINES, 1);
+ r_cmd->comm1_hi =
+ FIELD_PREP(XDMA_CMD_COMM1_HI_HOST_PITCH, 1) |
+ FIELD_PREP(XDMA_CMD_COMM1_HI_BMC_PITCH, 1);
+
+ /* do not trigger IRQ for first command */
+ conf = XDMA_CMD_CONF_CHECK;
+
+ nidx = (nidx + 1) % XDMA_NUM_CMDS;
+ }
+
+ /* undocumented formula to get required number of lines */
+ num_lines = (num_lines * 2) - 1;
+ }
+
+ /* ctrl == 0 indicates engine hasn't started properly; restart it */
+ if (!aspeed_xdma_reg_read(ctx, XDMA_CTRL))
+ aspeed_xdma_reset(ctx);
+
+ cmd->host_addr_lo = (u32)(op->host_addr & 0xFFFFFFFFULL);
+ cmd->host_addr_hi = (u32)(op->host_addr >> 32ULL);
+ cmd->bmc_addr = (bmc_addr & XDMA_CMD_BMC_ADDR) | XDMA_CMD_BMC_CHECK |
+ (op->upstream ? XDMA_CMD_BMC_DIR_US : 0);
+ cmd->conf = conf |
+ FIELD_PREP(XDMA_CMD_CONF_LINE_SIZE, line_size) |
+ FIELD_PREP(XDMA_CMD_CONF_NUM_LINES, num_lines);
+ cmd->comm1_hi = FIELD_PREP(XDMA_CMD_COMM1_HI_HOST_PITCH, line_size) |
+ FIELD_PREP(XDMA_CMD_COMM1_HI_BMC_PITCH, line_size);
+
+ memcpy(ctx->vga_virt, ctx->cmdq, XDMA_CMDQ_SIZE);
+
+ if (op->upstream)
+ set_bit(XDMA_UPSTREAM, &ctx->flags);
+ else
+ clear_bit(XDMA_UPSTREAM, &ctx->flags);
+
+ set_bit(XDMA_IN_PRG, &ctx->flags);
+
+ aspeed_xdma_reg_write(ctx, XDMA_BMC_CMD_QUEUE_WRITEP,
+ nidx * XDMA_CMD_SIZE);
+ ctx->cmd_idx = nidx;
+}
+
+static void aspeed_xdma_done(struct aspeed_xdma *ctx)
+{
+ if (ctx->current_client) {
+ clear_bit(XDMA_IN_PRG, &ctx->current_client->flags);
+
+ ctx->current_client = NULL;
+ }
+
+ clear_bit(XDMA_IN_PRG, &ctx->flags);
+ wake_up_interruptible_all(&ctx->wait);
+}
+
+static irqreturn_t aspeed_xdma_irq(int irq, void *arg)
+{
+ struct aspeed_xdma *ctx = arg;
+ u32 status = aspeed_xdma_reg_read(ctx, XDMA_STATUS);
+
+ if (status & XDMA_STATUS_US_COMP) {
+ if (test_bit(XDMA_UPSTREAM, &ctx->flags))
+ aspeed_xdma_done(ctx);
+ }
+
+ if (status & XDMA_STATUS_DS_COMP) {
+ if (!test_bit(XDMA_UPSTREAM, &ctx->flags))
+ aspeed_xdma_done(ctx);
+ }
+
+ aspeed_xdma_reg_write(ctx, XDMA_STATUS, status);
+
+ return IRQ_HANDLED;
+}
+
+static int aspeed_xdma_init_mem(struct aspeed_xdma *ctx)
+{
+ int rc;
+ u32 scu_conf = 0;
+ u32 mem_size = 0x20000000;
+ const u32 pcie_conf = SCU_PCIE_CONF_VGA_EN | SCU_PCIE_CONF_VGA_EN_MSI |
+ SCU_PCIE_CONF_VGA_EN_MCTP | SCU_PCIE_CONF_VGA_EN_IRQ |
+ SCU_PCIE_CONF_VGA_EN_DMA | SCU_PCIE_CONF_BMC_EN_MSI |
+ SCU_PCIE_CONF_BMC_EN_MCTP | SCU_PCIE_CONF_BMC_EN_IRQ |
+ SCU_PCIE_CONF_BMC_EN_DMA | SCU_PCIE_CONF_RSVD;
+ const u32 mem_sizes[4] = { 0x8000000, 0x10000000, 0x20000000,
+ 0x40000000 };
+ const u32 vga_sizes[4] = { 0x800000, 0x1000000, 0x2000000, 0x4000000 };
+ void __iomem *sdmc_base = ioremap(0x1e6e0000, 0x100);
+
+ regmap_write(ctx->scu, SCU_PCIE_CONF, pcie_conf);
+
+ regmap_read(ctx->scu, SCU_STRAP, &scu_conf);
+ ctx->vga_size = vga_sizes[FIELD_GET(SCU_STRAP_VGA_MEM, scu_conf)];
+
+ if (sdmc_base) {
+ u32 sdmc = readl(sdmc_base + SDMC_CONF);
+ u32 remap = readl(sdmc_base + SDMC_REMAP);
+
+ remap |= SDMC_REMAP_MAGIC;
+ writel(remap, sdmc_base + SDMC_REMAP);
+ remap = readl(sdmc_base + SDMC_REMAP);
+
+ mem_size = mem_sizes[sdmc & SDMC_CONF_MEM];
+ iounmap(sdmc_base);
+ }
+
+ ctx->vga_phys = (mem_size - ctx->vga_size) + 0x80000000;
+
+ ctx->cmdq = devm_kzalloc(ctx->dev, XDMA_CMDQ_SIZE, GFP_KERNEL);
+ if (!ctx->cmdq) {
+ dev_err(ctx->dev, "Failed to allocate command queue.\n");
+ return -ENOMEM;
+ }
+
+ rc = dma_set_mask_and_coherent(ctx->dev, DMA_BIT_MASK(32));
+ if (rc) {
+ dev_err(ctx->dev, "Failed to set DMA mask: %d.\n", rc);
+ return rc;
+ }
+
+ rc = dma_declare_coherent_memory(ctx->dev, ctx->vga_phys,
+ ctx->vga_phys, ctx->vga_size);
+ if (rc) {
+ dev_err(ctx->dev, "Failed to declare coherent memory: %d.\n",
+ rc);
+ return rc;
+ }
+
+ ctx->vga_virt = dma_alloc_coherent(ctx->dev, ctx->vga_size,
+ &ctx->vga_dma, GFP_KERNEL);
+ if (!ctx->vga_virt) {
+ dev_err(ctx->dev, "Failed to allocate DMA.\n");
+ dma_release_declared_memory(ctx->dev);
+ return -ENOMEM;
+ }
+
+ dev_dbg(ctx->dev, "VGA mapped at phys[%08x], size[%08x].\n",
+ ctx->vga_phys, ctx->vga_size);
+
+ return 0;
+}
+
+static int aspeed_xdma_probe(struct platform_device *pdev)
+{
+ int irq;
+ int rc;
+ struct resource *res;
+ struct device *dev = &pdev->dev;
+ struct aspeed_xdma *ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->dev = dev;
+ platform_set_drvdata(pdev, ctx);
+ init_waitqueue_head(&ctx->wait);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ ctx->base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(ctx->base)) {
+ dev_err(dev, "Unable to ioremap registers\n");
+ return PTR_ERR(ctx->base);
+ }
+
+ irq = irq_of_parse_and_map(dev->of_node, 0);
+ if (!irq) {
+ dev_err(dev, "Unable to find IRQ\n");
+ return -ENODEV;
+ }
+
+ rc = devm_request_irq(dev, irq, aspeed_xdma_irq, IRQF_SHARED,
+ DEVICE_NAME, ctx);
+ if (rc < 0) {
+ dev_err(dev, "Unable to request IRQ %d\n", irq);
+ return rc;
+ }
+
+ ctx->scu = syscon_regmap_lookup_by_compatible("aspeed,ast2500-scu");
+ if (IS_ERR(ctx->scu)) {
+ dev_err(ctx->dev, "Unable to grab SCU regs\n");
+ return PTR_ERR(ctx->scu);
+ }
+
+ ctx->reset = devm_reset_control_get_exclusive(dev, NULL);
+ if (IS_ERR(ctx->reset)) {
+ dev_err(dev, "Unable to request reset control\n");
+ return PTR_ERR(ctx->reset);
+ }
+
+ reset_control_deassert(ctx->reset);
+
+ msleep(10);
+
+ rc = aspeed_xdma_init_mem(ctx);
+ if (rc) {
+ reset_control_assert(ctx->reset);
+ return rc;
+ }
+
+ aspeed_xdma_init_eng(ctx);
+
+ return 0;
+}
+
+static int aspeed_xdma_remove(struct platform_device *pdev)
+{
+ struct aspeed_xdma *ctx = platform_get_drvdata(pdev);
+
+ dma_free_coherent(ctx->dev, ctx->vga_size, ctx->vga_virt,
+ ctx->vga_dma);
+ dma_release_declared_memory(ctx->dev);
+ reset_control_assert(ctx->reset);
+
+ return 0;
+}
+
+static const struct of_device_id aspeed_xdma_match[] = {
+ { .compatible = "aspeed,ast2500-xdma" },
+ { },
+};
+
+static struct platform_driver aspeed_xdma_driver = {
+ .probe = aspeed_xdma_probe,
+ .remove = aspeed_xdma_remove,
+ .driver = {
+ .name = DEVICE_NAME,
+ .of_match_table = aspeed_xdma_match,
+ },
+};
+
+module_platform_driver(aspeed_xdma_driver);
+
+MODULE_AUTHOR("Eddie James");
+MODULE_DESCRIPTION("Aspeed XDMA Engine Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/uapi/linux/aspeed-xdma.h b/include/uapi/linux/aspeed-xdma.h
new file mode 100644
index 0000000..9c1659d
--- /dev/null
+++ b/include/uapi/linux/aspeed-xdma.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright IBM Corp 2019 */
+
+#ifndef _UAPI_LINUX_ASPEED_XDMA_H_
+#define _UAPI_LINUX_ASPEED_XDMA_H_
+
+#include <linux/types.h>
+
+/*
+ * aspeed_xdma_op
+ *
+ * upstream: boolean indicating the direction of the DMA operation; upstream
+ * means a transfer from the BMC to the host
+ *
+ * host_addr: the DMA address on the host side, typically configured by PCI
+ * subsystem
+ *
+ * len: the size of the transfer in bytes; it should be a multiple of 16 bytes
+ */
+struct aspeed_xdma_op {
+ __u8 upstream;
+ __u64 host_addr;
+ __u32 len;
+} __packed;
+
+#endif /* _UAPI_LINUX_ASPEED_XDMA_H_ */
--
1.8.3.1
Document the bindings.
Signed-off-by: Eddie James <[email protected]>
---
.../devicetree/bindings/misc/aspeed,xdma.txt | 23 ++++++++++++++++++++++
1 file changed, 23 insertions(+)
create mode 100644 Documentation/devicetree/bindings/misc/aspeed,xdma.txt
diff --git a/Documentation/devicetree/bindings/misc/aspeed,xdma.txt b/Documentation/devicetree/bindings/misc/aspeed,xdma.txt
new file mode 100644
index 0000000..85e82ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/aspeed,xdma.txt
@@ -0,0 +1,23 @@
+* Device tree bindings for the Aspeed XDMA engine
+
+The XDMA Engine embedded in the AST2500 SOC can perform automatic DMA
+operations over PCI between the AST2500 (acting as a BMC) and a host processor.
+
+Required properties:
+
+ - compatible "aspeed,ast2500-xdma"
+ - reg contains the offset and length of the memory region
+ assigned to the XDMA registers
+ - resets reset specifier for the syscon reset associated with
+ the XDMA engine
+ - interrupts the interrupt associated with the XDMA engine on this
+ platform
+
+Example:
+
+ xdma@1e6e7000 {
+ compatible = "aspeed,ast2500-xdma";
+ reg = <0x1e6e7000 0x100>;
+ resets = <&syscon ASPEED_RESET_XDMA>;
+ interrupts = <6>;
+ };
--
1.8.3.1
On Mon, Mar 4, 2019 at 10:37 PM Eddie James <[email protected]> wrote:
>
> The XDMA engine embedded in the AST2500 SOC performs PCI DMA operations
> between the SOC (acting as a BMC) and a host processor in a server.
>
> This commit adds a driver to control the XDMA engine and adds functions
> to initialize the hardware and memory and start DMA operations.
>
> Signed-off-by: Eddie James <[email protected]>
Hi Eddie,
Thanks for your submission! Overall this looks well-implemented, but
I fear we already have too many ways of doing the same thing at
the moment, and I would hope to avoid adding yet another user space
interface for a specific hardware that does this.
Your interface appears to be a fairly low-level variant, just doing
single DMA transfers through ioctls, but configuring the PCIe
endpoint over sysfs.
Please have a look at the drivers/pci/endpoint framework first
and see if you can work on top of that interface instead.
Even if it doesn't quite do what you need here, we may be
able to extend it in a way that works for you, and lets others
use the same user interface extensions in the future.
It may also be necessary to split out the DMA engine portion
into a regular drivers/dma/ back-end to make that fit in with
the PCIe endpoint framework.
If you have already tried this without success, please let us
know in the description what problems you have hit, and why you
decided to create a new framework instead.
> +/*
> + * aspeed_xdma_op
> + *
> + * upstream: boolean indicating the direction of the DMA operation; upstream
> + * means a transfer from the BMC to the host
> + *
> + * host_addr: the DMA address on the host side, typically configured by PCI
> + * subsystem
> + *
> + * len: the size of the transfer in bytes; it should be a multiple of 16 bytes
> + */
> +struct aspeed_xdma_op {
> + __u8 upstream;
> + __u64 host_addr;
> + __u32 len;
> +} __packed;
Side-note: packed structures are generally not great user space
interfaces. Regardless of where we end up with this, I'd recommend
naturally aligning each member inside of the structure, and using
explicit padding here.
Arnd
On 3/5/19 2:01 AM, Arnd Bergmann wrote:
> On Mon, Mar 4, 2019 at 10:37 PM Eddie James <[email protected]> wrote:
>> The XDMA engine embedded in the AST2500 SOC performs PCI DMA operations
>> between the SOC (acting as a BMC) and a host processor in a server.
>>
>> This commit adds a driver to control the XDMA engine and adds functions
>> to initialize the hardware and memory and start DMA operations.
>>
>> Signed-off-by: Eddie James <[email protected]>
> Hi Eddie,
>
> Thanks for your submission! Overall this looks well-implemented, but
> I fear we already have too many ways of doing the same thing at
> the moment, and I would hope to avoid adding yet another user space
> interface for a specific hardware that does this.
>
> Your interface appears to be a fairly low-level variant, just doing
> single DMA transfers through ioctls, but configuring the PCIe
> endpoint over sysfs.
Hi, thanks for the quick response!
There is actually no PCIe configuration done in this driver. The two
sysfs entries control the system control unit (SCU) on the AST2500
purely to enable and disable entire PCIe devices. It might be possible
to control those devices more finely with a PCI endpoint driver, but
there is no need to do so. The XDMA engine does that by itself to
perform DMA fairly automatically.
If the sysfs entries are really troublesome, we can probably remove
those and find another way to control the SCU.
>
> Please have a look at the drivers/pci/endpoint framework first
> and see if you can work on top of that interface instead.
> Even if it doesn't quite do what you need here, we may be
> able to extend it in a way that works for you, and lets others
> use the same user interface extensions in the future.
>
> It may also be necessary to split out the DMA engine portion
> into a regular drivers/dma/ back-end to make that fit in with
> the PCIe endpoint framework.
Right, I did look into the normal DMA framework. There were a couple of
problems. First and foremost, the "device" (really, host processor)
address that we use is 64 bit, but the AST2500 is of course 32 bit. So I
couldn't find a good way to get the address through the DMA API into the
driver. It's entirely possible I missed something there though.
The other issue was that the vast majority of the DMA framework was
unused, resulting in a large amount of boilerplate that did nothing
except satisfy the API... I thought simplicity would be better in this case.
Let me know what you think... I could certainly switch to ioctl instead
of the write() if that's better. Or if you really think the DMA
framework is required here, let me know.
Thanks,
Eddie
>
> If you have already tried this without success, please let us
> know in the description what problems you have hit, and why you
> decided to create a new framework instead.
>
>> +/*
>> + * aspeed_xdma_op
>> + *
>> + * upstream: boolean indicating the direction of the DMA operation; upstream
>> + * means a transfer from the BMC to the host
>> + *
>> + * host_addr: the DMA address on the host side, typically configured by PCI
>> + * subsystem
>> + *
>> + * len: the size of the transfer in bytes; it should be a multiple of 16 bytes
>> + */
>> +struct aspeed_xdma_op {
>> + __u8 upstream;
>> + __u64 host_addr;
>> + __u32 len;
>> +} __packed;
> Side-note: packed structures are generally not great user space
> interfaces. Regardless of where we end up with this, I'd recommend
> naturally aligning each member inside of the structure, and using
> explicit padding here.
Understood, thanks.
>
>
> Arnd
>
On Wed, 6 Mar 2019, at 08:15, Eddie James wrote:
>
> On 3/5/19 2:01 AM, Arnd Bergmann wrote:
> > On Mon, Mar 4, 2019 at 10:37 PM Eddie James <[email protected]> wrote:
> >> The XDMA engine embedded in the AST2500 SOC performs PCI DMA operations
> >> between the SOC (acting as a BMC) and a host processor in a server.
> >>
> >> This commit adds a driver to control the XDMA engine and adds functions
> >> to initialize the hardware and memory and start DMA operations.
> >>
> >> Signed-off-by: Eddie James <[email protected]>
> > Hi Eddie,
> >
> > Thanks for your submission! Overall this looks well-implemented, but
> > I fear we already have too many ways of doing the same thing at
> > the moment, and I would hope to avoid adding yet another user space
> > interface for a specific hardware that does this.
> >
> > Your interface appears to be a fairly low-level variant, just doing
> > single DMA transfers through ioctls, but configuring the PCIe
> > endpoint over sysfs.
>
> Hi, thanks for the quick response!
>
> There is actually no PCIe configuration done in this driver. The two
> sysfs entries control the system control unit (SCU) on the AST2500
> purely to enable and disable entire PCIe devices. It might be possible
> to control those devices more finely with a PCI endpoint driver, but
> there is no need to do so. The XDMA engine does that by itself to
> perform DMA fairly automatically.
I had a series a while back to expose random bits from devices in sysfs. It got
shot down pretty well, but the main contention was over the devicetree
bindings.
I think we could revive it as a library-type thing that drivers can use to expose
bits like what you're describing without putting the grubby details in the
devicetree. The we would have a consistent approach to exposing otherwise
hard to describe functions (which is what a lot of a BMC turns out to be).
Andrew
On Tue, Mar 5, 2019 at 10:45 PM Eddie James <[email protected]> wrote:
> On 3/5/19 2:01 AM, Arnd Bergmann wrote:
> > On Mon, Mar 4, 2019 at 10:37 PM Eddie James <[email protected]> wrote:
> >> The XDMA engine embedded in the AST2500 SOC performs PCI DMA operations
> >> between the SOC (acting as a BMC) and a host processor in a server.
> >>
> >> This commit adds a driver to control the XDMA engine and adds functions
> >> to initialize the hardware and memory and start DMA operations.
> >>
> >> Signed-off-by: Eddie James <[email protected]>
> > Hi Eddie,
> >
> > Thanks for your submission! Overall this looks well-implemented, but
> > I fear we already have too many ways of doing the same thing at
> > the moment, and I would hope to avoid adding yet another user space
> > interface for a specific hardware that does this.
> >
> > Your interface appears to be a fairly low-level variant, just doing
> > single DMA transfers through ioctls, but configuring the PCIe
> > endpoint over sysfs.
>
> Hi, thanks for the quick response!
>
> There is actually no PCIe configuration done in this driver. The two
> sysfs entries control the system control unit (SCU) on the AST2500
> purely to enable and disable entire PCIe devices. It might be possible
> to control those devices more finely with a PCI endpoint driver, but
> there is no need to do so. The XDMA engine does that by itself to
> perform DMA fairly automatically.
>
> If the sysfs entries are really troublesome, we can probably remove
> those and find another way to control the SCU.
I think the main advantage of tying this to a PCIe endpoint driver
is that this would give us a logical object in the kernel that we
can add the user space interface to, and have the protocol on
top of it be portable between different SoCs.
> > Please have a look at the drivers/pci/endpoint framework first
> > and see if you can work on top of that interface instead.
> > Even if it doesn't quite do what you need here, we may be
> > able to extend it in a way that works for you, and lets others
> > use the same user interface extensions in the future.
> >
> > It may also be necessary to split out the DMA engine portion
> > into a regular drivers/dma/ back-end to make that fit in with
> > the PCIe endpoint framework.
>
> Right, I did look into the normal DMA framework. There were a couple of
> problems. First and foremost, the "device" (really, host processor)
> address that we use is 64 bit, but the AST2500 is of course 32 bit. So I
> couldn't find a good way to get the address through the DMA API into the
> driver. It's entirely possible I missed something there though.
32-bit ARM SoCs can be built with a 64-bit dma_addr_t. Would that
help you here?
> The other issue was that the vast majority of the DMA framework was
> unused, resulting in a large amount of boilerplate that did nothing
> except satisfy the API... I thought simplicity would be better in this case.
Simplicity is important indeed, but we have to weigh it against
having a consistent interface. What the dmaengine driver would
give us in combination with the PCIe endpoint driver is that it abstracts
the hardware from the protocol on top, which could then be done
in a way that is not specific to an AST2xxx chip.
> Let me know what you think... I could certainly switch to ioctl instead
> of the write() if that's better. Or if you really think the DMA
> framework is required here, let me know.
I don't think that replacing the ioctl() with a write() call specifically
would make much of a difference here. The question I'd like to
discuss further is what high-level user space interface you actually
need in order to implement what kind of functionality. We can then
look at whether this interface can be implemented on top of a
PCIe endpoint and a dmaengine driver in a portable way. If all
of those are true, then I'd definitely go with the modular approach
of having two standard drivers for the PCIe endpoint (should be
a trivial wrapper) and the dma engine (not trivial, but there are
many examples), plus a generic front-end in
drivers/pci/endpoint/functions/.
Arnd
On 3/6/19 4:48 AM, Arnd Bergmann wrote:
> On Tue, Mar 5, 2019 at 10:45 PM Eddie James <[email protected]> wrote:
>> On 3/5/19 2:01 AM, Arnd Bergmann wrote:
>>> On Mon, Mar 4, 2019 at 10:37 PM Eddie James <[email protected]> wrote:
>>>> The XDMA engine embedded in the AST2500 SOC performs PCI DMA operations
>>>> between the SOC (acting as a BMC) and a host processor in a server.
>>>>
>>>> This commit adds a driver to control the XDMA engine and adds functions
>>>> to initialize the hardware and memory and start DMA operations.
>>>>
>>>> Signed-off-by: Eddie James <[email protected]>
>>> Hi Eddie,
>>>
>>> Thanks for your submission! Overall this looks well-implemented, but
>>> I fear we already have too many ways of doing the same thing at
>>> the moment, and I would hope to avoid adding yet another user space
>>> interface for a specific hardware that does this.
>>>
>>> Your interface appears to be a fairly low-level variant, just doing
>>> single DMA transfers through ioctls, but configuring the PCIe
>>> endpoint over sysfs.
>> Hi, thanks for the quick response!
>>
>> There is actually no PCIe configuration done in this driver. The two
>> sysfs entries control the system control unit (SCU) on the AST2500
>> purely to enable and disable entire PCIe devices. It might be possible
>> to control those devices more finely with a PCI endpoint driver, but
>> there is no need to do so. The XDMA engine does that by itself to
>> perform DMA fairly automatically.
>>
>> If the sysfs entries are really troublesome, we can probably remove
>> those and find another way to control the SCU.
> I think the main advantage of tying this to a PCIe endpoint driver
> is that this would give us a logical object in the kernel that we
> can add the user space interface to, and have the protocol on
> top of it be portable between different SoCs.
>
>>> Please have a look at the drivers/pci/endpoint framework first
>>> and see if you can work on top of that interface instead.
>>> Even if it doesn't quite do what you need here, we may be
>>> able to extend it in a way that works for you, and lets others
>>> use the same user interface extensions in the future.
>>>
>>> It may also be necessary to split out the DMA engine portion
>>> into a regular drivers/dma/ back-end to make that fit in with
>>> the PCIe endpoint framework.
>> Right, I did look into the normal DMA framework. There were a couple of
>> problems. First and foremost, the "device" (really, host processor)
>> address that we use is 64 bit, but the AST2500 is of course 32 bit. So I
>> couldn't find a good way to get the address through the DMA API into the
>> driver. It's entirely possible I missed something there though.
> 32-bit ARM SoCs can be built with a 64-bit dma_addr_t. Would that
> help you here?
Yep, thanks, that's helpful.
>
>> The other issue was that the vast majority of the DMA framework was
>> unused, resulting in a large amount of boilerplate that did nothing
>> except satisfy the API... I thought simplicity would be better in this case.
> Simplicity is important indeed, but we have to weigh it against
> having a consistent interface. What the dmaengine driver would
> give us in combination with the PCIe endpoint driver is that it abstracts
> the hardware from the protocol on top, which could then be done
> in a way that is not specific to an AST2xxx chip.
>
>> Let me know what you think... I could certainly switch to ioctl instead
>> of the write() if that's better. Or if you really think the DMA
>> framework is required here, let me know.
> I don't think that replacing the ioctl() with a write() call specifically
> would make much of a difference here. The question I'd like to
> discuss further is what high-level user space interface you actually
> need in order to implement what kind of functionality. We can then
> look at whether this interface can be implemented on top of a
> PCIe endpoint and a dmaengine driver in a portable way. If all
> of those are true, then I'd definitely go with the modular approach
> of having two standard drivers for the PCIe endpoint (should be
> a trivial wrapper) and the dma engine (not trivial, but there are
> many examples), plus a generic front-end in
> drivers/pci/endpoint/functions/.
Hi Arnd,
Let me describe the top level interface we really need. The objective is
just to transfer arbitrary data between the two memory spaces (memory on
the AST2500 as the BMC, where the driver is running, and the memory on
the host processor). The user on the BMC (in user space; I can't think
of a use case for another driver needing to access this interface) has
the host address, transfer size, and, if it's a write, the data. User
needs to pass this into the driver and, if it's a read, retrieve the
transferred data.
I did start trying to implement the dmaengine framework, and I think it
could technically work. The addressing is no longer a problem, thanks to
your tip. However, I realized there are some other issues.
The main problem is that the only memory that the XDMA engine hardware
can access is the VGA reserved memory area on the AST2xxx. So I don't
see how it can ever be a pure dmaengine driver; it would always need an
additional interface or something to handle that memory area. If I
completed the dmaengine framework, any and all users would be required
to go through an additional step to get memory in the reserved area and
copy in/out of there. The way the driver stands, this memory management
is integrated, resulting in a fairly clean interface, though of course
it is unique.
As for the PCIe endpoint part, I'm not sure it fits this driver. I could
drop the sysfs entries and find another way to configure the SCU for
now... this driver really doesn't have anything to do with PCIe, except
for the fact that the XDMA hardware uses PCIe to do the actual work of
the data transfer.
What do you think? One other thought I had was that the driver might be
more suitable to go in drivers/soc/ as it is very specific to the
AST2xxx. But, up to you.
Thanks,
Eddie
>
> Arnd
>